View Javadoc
1   /**
2    * Redistribution and use of this software and associated documentation
3    * ("Software"), with or without modification, are permitted provided
4    * that the following conditions are met:
5    *
6    * 1. Redistributions of source code must retain copyright
7    *    statements and notices.  Redistributions must also contain a
8    *    copy of this document.
9    *
10   * 2. Redistributions in binary form must reproduce the
11   *    above copyright notice, this list of conditions and the
12   *    following disclaimer in the documentation and/or other
13   *    materials provided with the distribution.
14   *
15   * 3. The name "Exolab" must not be used to endorse or promote
16   *    products derived from this Software without prior written
17   *    permission of Intalio, Inc.  For written permission,
18   *    please contact info@exolab.org.
19   *
20   * 4. Products derived from this Software may not be called "Exolab"
21   *    nor may "Exolab" appear in their names without prior written
22   *    permission of Intalio, Inc. Exolab is a registered
23   *    trademark of Intalio, Inc.
24   *
25   * 5. Due credit should be given to the Exolab Project
26   *    (http://www.exolab.org/).
27   *
28   * THIS SOFTWARE IS PROVIDED BY INTALIO, INC. AND CONTRIBUTORS
29   * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
30   * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
31   * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
32   * INTALIO, INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
33   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
34   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
35   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
37   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
39   * OF THE POSSIBILITY OF SUCH DAMAGE.
40   *
41   * Copyright 1999-2002 (C) Intalio, Inc. All Rights Reserved.
42   *
43   * $Id$
44   */
45  
46  package org.exolab.castor.xml.schema.reader;
47  
48  import java.io.IOException;
49  import java.io.Reader;
50  
51  import org.apache.commons.logging.Log;
52  import org.apache.commons.logging.LogFactory;
53  import org.exolab.castor.net.URIException;
54  import org.exolab.castor.net.URILocation;
55  import org.exolab.castor.net.URIResolver;
56  import org.exolab.castor.util.NestedIOException;
57  import org.exolab.castor.xml.XMLException;
58  import org.exolab.castor.xml.schema.Schema;
59  import org.exolab.castor.xml.schema.SchemaContext;
60  import org.exolab.castor.xml.schema.SchemaContextImpl;
61  import org.xml.sax.EntityResolver;
62  import org.xml.sax.ErrorHandler;
63  import org.xml.sax.InputSource;
64  import org.xml.sax.Parser;
65  import org.xml.sax.SAXException;
66  import org.xml.sax.SAXParseException;
67  
68  /**
69   * A class for reading XML Schemas.
70   * 
71   * @author <a href="mailto:kvisco@intalio.com">Keith Visco</a>
72   * @version $Revision$ $Date: 2004-10-05 14:27:10 -0600 (Tue, 05 Oct
73   *          2004) $
74   **/
75  @SuppressWarnings("deprecation")
76  public class SchemaReader {
77  
78     /**
79      * The {@link Log} instance to use.
80      */
81     private static final Log LOG = LogFactory.getLog(SchemaReader.class);
82  
83     /**
84      * The Castor XML Context... mother of all.
85      */
86     private SchemaContext _schemaContext;
87  
88     /**
89      * XML Parser instance
90      */
91     private Parser _parser = null;
92  
93     /**
94      * SAX InputSource to Schema
95      */
96     private InputSource _source = null;
97  
98     /**
99      * SAX EntityResolver
100     */
101    private EntityResolver _resolver = null;
102 
103    /**
104     * SAX ErrorHandler
105     */
106    private ErrorHandler _errorHandler = null;
107 
108    /**
109     * The resolver to be used for resolving href
110     */
111    private URIResolver _uriResolver;
112 
113    /**
114     * A flag that indicates that included schemas should be cached instead of
115     * being inlined [which is the default behavior as specified by the XML
116     * Schema Specification].
117     * 
118     */
119    private boolean _cacheIncludedSchemas = false;
120 
121    private Schema _schema = null;
122 
123    private boolean _validate = true;
124 
125    /**
126     * Old fashion style to create a SchemaReader instance.
127     * 
128     * @throws IOException
129     *            if no Parser is available
130     */
131    private void init() throws IOException {
132       // get default parser from Configuration
133       _schemaContext = new SchemaContextImpl();
134 
135       Parser parser = _schemaContext.getParser();
136 
137       if (parser == null) {
138          String message = "fatal error: unable to create SAX parser.";
139          LOG.warn(message);
140          throw new IOException(message);
141       }
142 
143       _parser = parser;
144    }
145 
146    /**
147     * Creates a new SchemaReader for the given InputSource
148     * 
149     * @param source
150     *           the InputSource to read the Schema from.
151     */
152    public SchemaReader(InputSource source) throws IOException {
153       init();
154 
155       if (source == null)
156          throw new IllegalArgumentException("InputSource cannot be null");
157 
158       _source = source;
159 
160    }
161 
162    /**
163     * Creates a new SchemaReader for the given Reader
164     * 
165     * @param reader
166     *           the Reader to read the Schema from.
167     * @param filename
168     *           for reporting errors.
169     **/
170    public SchemaReader(Reader reader, String filename) throws IOException {
171       init();
172 
173       if (reader == null) {
174          String err = "The argument 'reader' must not be null.";
175          throw new IllegalArgumentException(err);
176       }
177 
178       _source = new InputSource(reader);
179       if (filename == null)
180          filename = reader.toString();
181       _source.setPublicId(filename);
182 
183    }
184 
185    /**
186     * Creates a new SchemaReader for the given URL
187     * 
188     * @param url
189     *           the URL string
190     **/
191    public SchemaReader(String url) throws IOException {
192       init();
193       if (url == null) {
194          String err = "The argument 'url' must not be null.";
195          throw new IllegalArgumentException(err);
196       }
197       _source = new InputSource(url);
198 
199    }
200 
201    /**
202     * New style how to create a SchemaReader instance, requiring that
203     * {@link SchemaContext} and InputSource are set before calling {@link read}.
204     */
205    public SchemaReader() {
206       super();
207    }
208 
209    /**
210     * To set the {@link SchemaContext} to be used. Also resets the parser as it
211     * depends of the {@link SchemaContext}.
212     * 
213     * @param schemaContext
214     *           the {@link SchemaContext} to be used
215     */
216    public void setSchemaContext(final SchemaContext schemaContext) {
217       this._schemaContext = schemaContext;
218 
219       Parser p = _schemaContext.getParser();
220       if (p != null) {
221          _parser = p;
222       }
223    }
224 
225    /**
226     * A different way to create a SchemaReader by using an empty constructor and
227     * setting the InputSource afterwards.
228     * 
229     * @param inputSource
230     *           the InputSource to read the schema from
231     */
232    public void setInputSource(final InputSource inputSource) {
233       if (inputSource == null) {
234          String message = "InputSource must not be null";
235          LOG.warn(message);
236          throw new IllegalArgumentException(message);
237       }
238       _source = inputSource;
239    }
240 
241    /**
242     * Reads the Schema from the source and returns the Schema object model.
243     * 
244     * <BR />
245     * <B>Note:</B> Subsequent calls to this method will simply return a cached
246     * copy of the Schema object. To read a new Schema object, create a new
247     * Reader.
248     * 
249     * @return the new Schema created from the source of this SchemaReader
250     **/
251   public Schema read() throws IOException {
252       if (_schema != null) {
253          return _schema;
254       }
255       if (_parser == null) {
256          String message = "Required Parser was not specified";
257          LOG.warn(message);
258          throw new IllegalStateException(message);
259       }
260       if (_source == null) {
261          String message = "Required Source was not specified";
262          LOG.warn(message);
263          throw new IllegalStateException(message);
264       }
265       SchemaUnmarshaller schemaUnmarshaller = null;
266 
267       try {
268          SchemaUnmarshallerState state = new SchemaUnmarshallerState();
269          // TODO[Joachim] state.setConfiguration(_config);
270          state.cacheIncludedSchemas = _cacheIncludedSchemas;
271          schemaUnmarshaller = new SchemaUnmarshaller(_schemaContext, state);
272          if (_uriResolver != null)
273             schemaUnmarshaller.setURIResolver(_uriResolver);
274 
275          // make sure we mark the URI as processed for cyclic imports/includes
276          String uri = _source.getSystemId();
277          if (uri != null) {
278             URIResolver resolver = schemaUnmarshaller.getURIResolver();
279             try {
280                URILocation location = resolver.resolve(uri, null);
281                if (location != null)
282                   uri = location.toString();
283             } catch (URIException except) {
284                throw new NestedIOException(except);
285             }
286             state.markAsProcessed(uri, schemaUnmarshaller.getSchema());
287          }
288 
289          Sax2ComponentReader handler = new Sax2ComponentReader(schemaUnmarshaller);
290          _parser.setDocumentHandler(handler);
291 
292          if (_errorHandler == null)
293             _parser.setErrorHandler(handler);
294          else
295             _parser.setErrorHandler(_errorHandler);
296 
297          if (_resolver != null)
298             _parser.setEntityResolver(_resolver);
299          _parser.parse(_source);
300       } catch (XMLException ex) {
301          handleException(ex);
302       } catch (org.xml.sax.SAXException sx) {
303          handleException(sx);
304       }
305 
306       _schema = schemaUnmarshaller.getSchema();
307 
308       if (_validate) {
309          try {
310             _schema.validate();
311          } catch (org.exolab.castor.xml.ValidationException vx) {
312             throw new NestedIOException(vx);
313          }
314       }
315 
316       return _schema;
317 
318    }
319 
320    /**
321     * Sets the ErrorHandler.
322     * 
323     * @param errorHandler
324     **/
325    public void setErrorHandler(ErrorHandler errorHandler) {
326       _errorHandler = errorHandler;
327    }
328 
329    /**
330     * Sets wheter or not to cache the included xml schemas instead of inlining
331     * them as specified by the XML Schema specification.
332     * 
333     * @param cache
334     *           true to cache the included XML Schemas.
335     **/
336    public void setCacheIncludedSchemas(boolean cache) {
337       _cacheIncludedSchemas = cache;
338    }
339 
340    /**
341     * Sets whether or not post-read validation should occur. By default,
342     * validation is enabled. Note that certain read validation cannot be
343     * disabled.
344     * 
345     * @param validate
346     *           a boolean that when true will force a call to Schema#validate
347     *           after the schema is read.
348     **/
349    public void setValidation(boolean validate) {
350       _validate = validate;
351    }
352 
353    /**
354     * Sets the EntityResolver used to resolve SYSTEM Identifier. If the entity
355     * resolver is null, the default one will be used.
356     * 
357     * @param resolver
358     *           the EntityResolver to use.
359     */
360    public void setEntityResolver(EntityResolver resolver) {
361       _resolver = resolver;
362    }
363 
364    /**
365     * Sets the URIResolver used to resolve hrefs. If the entity resolver is
366     * null, the default one will be used.
367     * 
368     * @param uriresolver
369     *           the URIResolver to use.
370     */
371    public void setURIResolver(URIResolver uriresolver) {
372       _uriResolver = uriresolver;
373    }
374 
375    /**
376     * Handle an exception which is one of our own XMLExceptions.
377     * 
378     * @param xmlException
379     *           the XMLException to handle.
380     * @throws IOException
381     */
382    private void handleException(XMLException xmlException) throws IOException {
383       throw new NestedIOException(xmlException);
384    }
385 
386    /**
387     * Handle an exception which is a foreign SAXException.
388     * 
389     * @param sx
390     *           The SAXException to handle.
391     * @throws IOException
392     */
393    private void handleException(SAXException sx) throws IOException {
394       Exception except = sx.getException();
395       if (except == null) {
396          except = sx;
397       } else if (except instanceof SAXParseException) {
398          SAXParseException spe = (SAXParseException) except;
399          String filename = spe.getSystemId();
400          if (filename == null)
401             filename = spe.getPublicId();
402          if (filename == null)
403             filename = "<filename unavailable>";
404 
405          String err = spe.getMessage();
406 
407          err += "; " + filename + " [ line: " + spe.getLineNumber();
408          err += ", column: " + spe.getColumnNumber() + ']';
409          throw new NestedIOException(err, except);
410       } else if (except instanceof XMLException) {
411          handleException((XMLException) except);
412       }
413 
414       throw new NestedIOException(except);
415 
416    }
417 }