1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46 package org.exolab.castor.xml.dtd;
47
48 import java.io.FileInputStream;
49 import java.io.FileOutputStream;
50 import java.io.IOException;
51 import java.io.InputStreamReader;
52 import java.io.OutputStreamWriter;
53 import java.io.Reader;
54 import java.io.StringReader;
55 import java.io.Writer;
56 import java.util.Enumeration;
57 import java.util.HashMap;
58 import java.util.Iterator;
59 import java.util.Map;
60
61 import org.apache.commons.cli.CommandLine;
62 import org.apache.commons.cli.CommandLineParser;
63 import org.apache.commons.cli.GnuParser;
64 import org.apache.commons.cli.HelpFormatter;
65 import org.apache.commons.cli.Option;
66 import org.apache.commons.cli.OptionBuilder;
67 import org.apache.commons.cli.Options;
68 import org.apache.commons.logging.Log;
69 import org.apache.commons.logging.LogFactory;
70 import org.exolab.castor.xml.dtd.parser.DTDInitialParser;
71 import org.exolab.castor.xml.dtd.parser.DTDParser;
72 import org.exolab.castor.xml.dtd.parser.InputCharStream;
73 import org.exolab.castor.xml.dtd.parser.ParseException;
74 import org.exolab.castor.xml.dtd.parser.TokenMgrError;
75 import org.exolab.castor.xml.schema.Annotation;
76 import org.exolab.castor.xml.schema.AttributeDecl;
77 import org.exolab.castor.xml.schema.ComplexType;
78 import org.exolab.castor.xml.schema.ContentType;
79 import org.exolab.castor.xml.schema.Documentation;
80 import org.exolab.castor.xml.schema.ElementDecl;
81 import org.exolab.castor.xml.schema.Facet;
82 import org.exolab.castor.xml.schema.FacetFactory;
83 import org.exolab.castor.xml.schema.Group;
84 import org.exolab.castor.xml.schema.Order;
85 import org.exolab.castor.xml.schema.Particle;
86 import org.exolab.castor.xml.schema.Schema;
87 import org.exolab.castor.xml.schema.SchemaException;
88 import org.exolab.castor.xml.schema.SimpleType;
89 import org.exolab.castor.xml.schema.SimpleTypesFactory;
90 import org.exolab.castor.xml.schema.Wildcard;
91 import org.exolab.castor.xml.schema.writer.SchemaWriter;
92 import org.xml.sax.SAXException;
93
94
95
96
97
98
99
100
101
102
103
104 public class Converter {
105
106 private static final Log log = LogFactory.getLog(Converter.class);
107
108 public static final String NAME_SPACE_PREFIX_KEY = "nameSpacePrefixKey";
109
110 public static final String NAME_SPACE_KEY = "nameSpaceKey";
111
112 public static final String DEFAULT_NAME_SPACE_PREFIX = "tns";
113
114 public static final String DEFAULT_NAME_SPACE = "generated.castor.org";
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141 public static void main(String args[]) throws IOException, DTDException,
142 SchemaException, SAXException {
143
144 StringBuffer header = new StringBuffer();
145 header.append("\n");
146 header.append("Converts a DTD to an XML schema.\n\n");
147 header.append(" <DTD>: Name of the input DTD file.\n");
148 header.append(" <XSD>: Name of the output XML schema file.\n");
149 header.append("\n");
150 header.append("Options:");
151
152 Options options = new Options();
153
154 Option targetNamespace = OptionBuilder
155 .withDescription("target namespace of the XML schema generated")
156 .isRequired(false)
157 .withLongOpt("targetNamespace")
158 .hasArg()
159 .withArgName("[prefix:]uri")
160 .create("tns");
161
162 Option xmlns = OptionBuilder
163 .withDescription("xml namespace declarations")
164 .isRequired(false)
165 .hasArgs()
166 .withValueSeparator(',')
167 .withArgName("[[prefix:]uri]*")
168 .create("xmlns");
169
170 options.addOption(targetNamespace);
171 options.addOption(xmlns);
172 options.addOption("h", "help", false, "prints usage information");
173 options.addOption("e", "encoding", false, "character encoding");
174
175 CommandLineParser parser = new GnuParser();
176 CommandLine line = null;
177 try {
178 line = parser.parse(options, args);
179 } catch (org.apache.commons.cli.ParseException e) {
180 System.err.println( "Parsing failed. Reason: " + e.getMessage() );
181 }
182
183 if (args.length < 2 || line.hasOption("help")) {
184 HelpFormatter formatter = new HelpFormatter();
185 formatter.printHelp("org.exolab.castor.xml.dtd.Converter <DTD> <XSD>",
186 header.toString(),
187 options,
188 "");
189 return;
190 }
191
192 String encoding = "US-ASCII";
193 String targetNameSpace = DEFAULT_NAME_SPACE;
194 Map<String, String> nameSpaceMap = new HashMap<String, String>();
195
196 if (line.hasOption("tns")) {
197 log.info("Found option -tns ...");
198 Map<String, String> nameSpaceMapTemp = parseNamespace(line.getOptionValue("tns"));
199 targetNameSpace = nameSpaceMapTemp.get(NAME_SPACE_KEY);
200 nameSpaceMap.put(nameSpaceMapTemp.get(NAME_SPACE_PREFIX_KEY), targetNameSpace);
201 }
202
203 if (line.hasOption("xmlns")) {
204 log.info("Found option -xmlns ...");
205 Map<String, String> nameSpaceMapTemp = parseNamespace(line.getOptionValue("xmlns"));
206 nameSpaceMap.put(nameSpaceMapTemp.get(NAME_SPACE_PREFIX_KEY),
207 nameSpaceMapTemp.get(NAME_SPACE_KEY));
208 }
209
210 if (line.hasOption("encoding")) {
211 log.info("Found option -encoding ...");
212 String encodingValue = line.getOptionValue("encoding");
213 if (encodingValue.equalsIgnoreCase("ascii")
214 || args[2].equalsIgnoreCase("us-ascii")) {
215 encoding = "US-ASCII";
216 } else if (encodingValue.equalsIgnoreCase("utf-8")) {
217 encoding = "UTF-8";
218 } else if (encodingValue.equalsIgnoreCase("utf-16")) {
219 encoding = "UTF-16";
220 } else {
221 encoding = encodingValue.toUpperCase();
222 }
223 }
224
225 String inFile = args[0];
226 String outFile = args[1];
227
228 Converter convertor = new Converter();
229 convertor.process(inFile, outFile, encoding, targetNameSpace, nameSpaceMap);
230 }
231
232
233
234
235
236
237
238
239
240
241
242
243 public void process(final String dtdFile, final String schemaFile, final String encoding,
244 final String targetNamespace,
245 final Map<String, String> namespaces) throws SchemaException, DTDException, IOException, SAXException {
246
247
248 FileInputStream inputStream = new FileInputStream(dtdFile);
249
250
251 InputStreamReader reader = new InputStreamReader(inputStream, encoding);
252
253
254 FileOutputStream outputStream = new FileOutputStream(schemaFile);
255
256
257 OutputStreamWriter writer = new OutputStreamWriter(outputStream, encoding);
258
259 process (reader, writer, encoding, targetNamespace, namespaces);
260 }
261
262
263
264
265
266
267
268
269
270
271
272
273 public void process(final Reader in, final Writer out, final String encoding,
274 final String targetNamespace,
275 final Map<String, String> namespaces) throws SchemaException, DTDException, IOException, SAXException {
276
277
278 convertDTDtoSchema(in, out, targetNamespace, namespaces);
279
280 in.close();
281 out.close();
282 }
283
284 protected static Map<String, String> parseNamespace(final String nameSpaceArg) {
285
286 Map<String, String> nameSpaceMap = new HashMap<String, String>();
287
288 if ("".equals(nameSpaceArg.substring("-tns=".length()))) {
289 throw new RuntimeException("name space argument is emty, Spaces after '='? ");
290 }
291
292 String[] tnsToken = nameSpaceArg.substring("-tns=".length()).split(":", 2);
293
294 if (isNameSpacePrefix(tnsToken[0])) {
295 nameSpaceMap.put(NAME_SPACE_PREFIX_KEY, tnsToken[0]);
296 nameSpaceMap.put(NAME_SPACE_KEY, tnsToken[1]);
297 } else {
298 nameSpaceMap.put(NAME_SPACE_KEY, nameSpaceArg.substring("-tns=".length()));
299 nameSpaceMap.put(NAME_SPACE_PREFIX_KEY, "tns");
300 }
301
302 return nameSpaceMap;
303
304 }
305
306 protected static boolean isNameSpacePrefix(String nameSpacePrefix2test) {
307
308 if (!nameSpacePrefix2test.matches("[a-z]*")) {
309 return false;
310 }
311
312 if (nameSpacePrefix2test.matches("https?")) {
313 return false;
314 }
315
316 return true;
317
318 }
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342 public void convertDTDtoSchema(Reader reader, Writer writer,
343 String targetNameSpace, Map<String, String> nameSpaceMap)
344 throws DTDException, SchemaException, IOException, SAXException {
345
346
347 DTDdocument dtd = parseDTD(reader);
348
349
350 Schema schema = convertDTDObjectToSchemaObject(dtd, targetNameSpace,
351 nameSpaceMap);
352
353
354 marshalSchema(schema, writer);
355
356 }
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377 public void convertDTDtoSchema(Reader reader, Writer writer)
378 throws DTDException, SchemaException, IOException, SAXException {
379
380 String targetNameSpace = DEFAULT_NAME_SPACE;
381 Map<String, String> nameSpaceMap = new HashMap<String, String>();
382
383 convertDTDtoSchema(reader, writer, targetNameSpace, nameSpaceMap);
384 }
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404 public DTDdocument parseDTD(Reader reader) throws DTDException {
405 try {
406
407 InputCharStream charStream;
408
409
410
411 charStream = new InputCharStream(reader);
412
413
414 DTDInitialParser initialParser = new DTDInitialParser(charStream);
415
416
417
418 String intermedResult = initialParser.Input();
419
420
421 StringReader strReader = new StringReader(intermedResult);
422
423
424 charStream = new InputCharStream(strReader);
425
426
427 DTDParser parser = new DTDParser(charStream);
428
429
430
431 DTDdocument dtd = parser.Input();
432
433 strReader.close();
434
435
436 return dtd;
437 } catch (TokenMgrError tme) {
438 String msg = tme.getMessage();
439 throw new DTDException("TokenMgrError"
440 + (msg == null ? "" : ": " + msg));
441 } catch (ParseException pe) {
442 String msg = pe.getMessage();
443 throw new DTDException("ParseException"
444 + (msg == null ? "" : ": " + msg));
445 }
446 }
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463 public Schema convertDTDObjectToSchemaObject(DTDdocument dtd,
464 String targetNamespace, Map nameSpaceMap) throws DTDException,
465 SchemaException {
466
467 Schema schema = new Schema();
468
469 String name = dtd.getName();
470 if (name != null && !name.equals("")) {
471 schema.setId(name);
472 }
473
474 schema.setTargetNamespace(targetNamespace);
475
476 for (Iterator<String> namespaces = nameSpaceMap.keySet().iterator(); namespaces
477 .hasNext();) {
478 String xmlns = namespaces.next();
479 schema.addNamespace(xmlns, (String) nameSpaceMap.get(xmlns));
480 }
481
482
483 Enumeration dtdNotations = dtd.getNotations();
484
485 while (dtdNotations.hasMoreElements()) {
486 dtdNotations.nextElement();
487
488
489 }
490
491
492
493
494
495
496 Enumeration dtdGeneralEntities = dtd.getGeneralEntities();
497 if (dtdGeneralEntities.hasMoreElements()) {
498 GeneralEntity ge;
499 Annotation annotation = new Annotation();
500 Documentation documentation;
501 String text;
502
503 while (dtdGeneralEntities.hasMoreElements()) {
504 ge = (GeneralEntity) dtdGeneralEntities.nextElement();
505 documentation = new Documentation();
506
507 text = "General Entity Declaration";
508 documentation.add(text);
509 documentation.add(ge);
510 annotation.addDocumentation(documentation);
511 }
512
513 schema.addAnnotation(annotation);
514 }
515
516
517
518 Enumeration<Element> dtdElements = dtd.getElements();
519 Element dtdElement;
520 ElementDecl schemaElement;
521
522 while (dtdElements.hasMoreElements()) {
523 dtdElement = dtdElements.nextElement();
524 schemaElement = convertDTDElementToSchemaElement(dtdElement, schema);
525 schema.addElementDecl(schemaElement);
526 }
527
528 return schema;
529
530 }
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548 public ElementDecl convertDTDElementToSchemaElement(
549 Element dtdElement, Schema schema) throws DTDException,
550 SchemaException {
551
552 String name = dtdElement.getName();
553 if (name == null || name.equals("")) {
554 String err = "DTD to Schema converter: a DTD element has no name.";
555 throw new DTDException(err);
556 }
557 ElementDecl schemaElement = new ElementDecl(schema, name);
558
559
560 ComplexType complexType = schema.createComplexType();
561 ContentType contentType = null;
562 Group group = null;
563 Iterator mixedChildrenIterator = null;
564 String elementRef = null;
565 ElementDecl elem = null;
566
567 if (dtdElement.isEmptyContent()) {
568
569
570 contentType = ContentType.elemOnly;
571
572
573 } else if (dtdElement.isAnyContent()) {
574
575
576 contentType = ContentType.mixed;
577
578
579 group = new Group();
580 group.setOrder(Order.sequence);
581 group.setMinOccurs(0);
582 group.setMaxOccurs(-1);
583 Wildcard any = new Wildcard(group);
584 group.addWildcard(any);
585 complexType.addGroup(group);
586
587 } else if (dtdElement.isElemOnlyContent()) {
588
589
590 contentType = ContentType.elemOnly;
591
592
593 ContentParticle dtdContent = dtdElement.getContent();
594 if (dtdContent == null) {
595 String err = "DTD to Schema converter: element \""
596 + dtdElement.getName();
597 err += "\" has no content.";
598 throw new DTDException(err);
599 }
600
601 Particle content = null;
602 try {
603 content = convertContentParticle(dtdContent, schema);
604 } catch (DTDException e) {
605 String err = "DTD to Schema converter: content of DTD element \""
606 + dtdElement.getName();
607 err += "\", represented by a Content Particle, is malformed.";
608 throw new DTDException(err);
609 }
610
611 if (content instanceof ElementDecl) {
612 group = new Group();
613 group.setOrder(Order.sequence);
614 group.addElementDecl((ElementDecl) content);
615 complexType.addGroup(group);
616 } else {
617 complexType.addGroup((Group) content);
618 }
619
620 } else if (dtdElement.isMixedContent()) {
621
622
623 contentType = ContentType.mixed;
624
625
626 mixedChildrenIterator = dtdElement.getMixedContentChildren();
627 if ((mixedChildrenIterator != null)
628 && (mixedChildrenIterator.hasNext())) {
629 group = new Group();
630 group.setOrder(Order.choice);
631 group.setMinOccurs(0);
632 group.setMaxOccurs(-1);
633 while (mixedChildrenIterator.hasNext()) {
634 elementRef = (String) mixedChildrenIterator.next();
635 elem = new ElementDecl(schema);
636 elem.setReferenceName(elementRef);
637 group.addElementDecl(elem);
638 }
639 complexType.addGroup(group);
640 }
641
642 } else {
643 String err = "DTD to Schema converter: content type of DTD element \""
644 + dtdElement.getName();
645 err += "\" has not been specified.";
646 throw new DTDException(err);
647 }
648 complexType.setContentType(contentType);
649
650
651
652 Enumeration dtdAttributes = dtdElement.getAttributes();
653 Attribute dtdAttribute;
654 AttributeDecl schemaAttribute;
655
656 while (dtdAttributes.hasMoreElements()) {
657 dtdAttribute = (Attribute) dtdAttributes.nextElement();
658 schemaAttribute = convertAttribute(dtdAttribute, schema);
659 complexType.addAttributeDecl(schemaAttribute);
660 }
661
662
663 schemaElement.setType(complexType);
664 return schemaElement;
665
666 }
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688 public Particle convertContentParticle(ContentParticle dtdContent,
689 Schema schema) throws DTDException, SchemaException {
690
691 Particle returnValue;
692
693 if (dtdContent.isReferenceType()) {
694
695 ElementDecl elem = new ElementDecl(schema);
696 elem.setReferenceName(dtdContent.getReference());
697 returnValue = elem;
698
699 } else if (dtdContent.isSeqType() || dtdContent.isChoiceType()) {
700
701 Group group = new Group();
702 if (dtdContent.isSeqType())
703 group.setOrder(Order.sequence);
704 else
705 group.setOrder(Order.choice);
706
707 Enumeration<ContentParticle> children = dtdContent.getChildren();
708 ContentParticle child;
709 Particle contentParticle;
710
711 while (children.hasMoreElements()) {
712 child = children.nextElement();
713 contentParticle = convertContentParticle(child, schema);
714
715 if (contentParticle instanceof ElementDecl) {
716 group.addElementDecl((ElementDecl) contentParticle);
717 } else {
718 group.addGroup((Group) contentParticle);
719 }
720 }
721
722 returnValue = group;
723
724 } else {
725 throw new DTDException();
726 }
727
728 if (dtdContent.isOneOccurance()) {
729 returnValue.setMinOccurs(1);
730 returnValue.setMaxOccurs(1);
731 } else if (dtdContent.isOneOrMoreOccurances()) {
732 returnValue.setMinOccurs(1);
733 returnValue.setMaxOccurs(-1);
734 } else if (dtdContent.isZeroOrMoreOccurances()) {
735 returnValue.setMinOccurs(0);
736 returnValue.setMaxOccurs(-1);
737 } else if (dtdContent.isZeroOrOneOccurance()) {
738 returnValue.setMinOccurs(0);
739 returnValue.setMaxOccurs(1);
740 } else {
741
742
743 }
744
745 return returnValue;
746
747 }
748
749
750
751
752
753
754
755
756
757
758
759
760 public AttributeDecl convertAttribute(Attribute dtdAttribute,
761 Schema schema) throws DTDException {
762
763 AttributeDecl schemaAttribute = new AttributeDecl(schema, dtdAttribute
764 .getName());
765
766 SimpleType type = null;
767
768 if (dtdAttribute.isStringType()) {
769 type = schema.getSimpleType(schema
770 .getBuiltInTypeName(SimpleTypesFactory.STRING_TYPE));
771 } else if (dtdAttribute.isIDType()) {
772 type = schema.getSimpleType(schema
773 .getBuiltInTypeName(SimpleTypesFactory.ID_TYPE));
774 } else if (dtdAttribute.isIDREFType()) {
775 type = schema.getSimpleType(schema
776 .getBuiltInTypeName(SimpleTypesFactory.IDREF_TYPE));
777 } else if (dtdAttribute.isIDREFSType()) {
778 type = schema.getSimpleType(schema
779 .getBuiltInTypeName(SimpleTypesFactory.IDREFS_TYPE));
780 } else if (dtdAttribute.isENTITYType()) {
781 type = schema.getSimpleType(schema
782 .getBuiltInTypeName(SimpleTypesFactory.ENTITY_TYPE));
783 } else if (dtdAttribute.isENTITIESType()) {
784 type = schema.getSimpleType(schema
785 .getBuiltInTypeName(SimpleTypesFactory.ENTITIES_TYPE));
786 } else if (dtdAttribute.isNMTOKENType()) {
787 type = schema.getSimpleType(schema
788 .getBuiltInTypeName(SimpleTypesFactory.NMTOKEN_TYPE));
789 } else if (dtdAttribute.isNMTOKENSType()) {
790 type = schema.getSimpleType(schema
791 .getBuiltInTypeName(SimpleTypesFactory.NMTOKENS_TYPE));
792 } else if (dtdAttribute.isNOTATIONType()) {
793 type = schema.createSimpleType(null, schema
794 .getBuiltInTypeName(SimpleTypesFactory.NOTATION_TYPE),
795 "restriction");
796 Iterator<String> values = dtdAttribute.getValues();
797 FacetFactory facetFactory = FacetFactory.getInstance();
798 while (values.hasNext()) {
799 Facet facet = facetFactory.createFacet(Facet.ENUMERATION,
800 values.next());
801 facet.setOwningType(type);
802 type.addFacet(facet);
803 }
804
805 } else if (dtdAttribute.isEnumerationType()) {
806 type = schema.createSimpleType(null, schema
807 .getBuiltInTypeName(SimpleTypesFactory.NMTOKEN_TYPE),
808 "restriction");
809 Iterator<String> values = dtdAttribute.getValues();
810 FacetFactory facetFactory = FacetFactory.getInstance();
811 while (values.hasNext()) {
812 Facet facet = facetFactory.createFacet(Facet.ENUMERATION,
813 values.next());
814 facet.setOwningType(type);
815 type.addFacet(facet);
816 }
817 } else {
818 String err = "DTD to Schema converter: DTD attribute \""
819 + dtdAttribute.getName();
820 err += "\" has unspecified type.";
821 throw new DTDException(err);
822 }
823
824 schemaAttribute.setSimpleType(type);
825
826 if (dtdAttribute.isREQUIRED()) {
827 schemaAttribute.setUse(AttributeDecl.USE_REQUIRED);
828 } else if (dtdAttribute.isIMPLIED()) {
829 schemaAttribute.setUse(AttributeDecl.USE_OPTIONAL);
830 } else if (dtdAttribute.isFIXED()) {
831
832 } else {
833 schemaAttribute.setDefaultValue(dtdAttribute.getDefaultValue());
834 }
835
836 return schemaAttribute;
837 }
838
839
840
841
842
843
844
845
846
847
848
849
850
851 public void marshalSchema(Schema schema, Writer writer)
852 throws IOException, SAXException {
853
854 SchemaWriter sw = new SchemaWriter(writer);
855 sw.write(schema);
856 }
857
858 }