1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 package org.exolab.castor.xml.dtd;
37
38 import java.io.FileInputStream;
39 import java.io.FileOutputStream;
40 import java.io.IOException;
41 import java.io.InputStreamReader;
42 import java.io.OutputStreamWriter;
43 import java.io.Reader;
44 import java.io.StringReader;
45 import java.io.Writer;
46 import java.util.Enumeration;
47 import java.util.HashMap;
48 import java.util.Iterator;
49 import java.util.Map;
50
51 import org.apache.commons.cli.CommandLine;
52 import org.apache.commons.cli.CommandLineParser;
53 import org.apache.commons.cli.DefaultParser;
54 import org.apache.commons.cli.HelpFormatter;
55 import org.apache.commons.cli.Option;
56 import org.apache.commons.cli.Options;
57 import org.apache.commons.logging.Log;
58 import org.apache.commons.logging.LogFactory;
59 import org.exolab.castor.xml.dtd.parser.DTDInitialParser;
60 import org.exolab.castor.xml.dtd.parser.DTDParser;
61 import org.exolab.castor.xml.dtd.parser.InputCharStream;
62 import org.exolab.castor.xml.dtd.parser.ParseException;
63 import org.exolab.castor.xml.dtd.parser.TokenMgrError;
64 import org.exolab.castor.xml.schema.Annotation;
65 import org.exolab.castor.xml.schema.AttributeDecl;
66 import org.exolab.castor.xml.schema.ComplexType;
67 import org.exolab.castor.xml.schema.ContentType;
68 import org.exolab.castor.xml.schema.Documentation;
69 import org.exolab.castor.xml.schema.ElementDecl;
70 import org.exolab.castor.xml.schema.Facet;
71 import org.exolab.castor.xml.schema.FacetFactory;
72 import org.exolab.castor.xml.schema.Group;
73 import org.exolab.castor.xml.schema.Order;
74 import org.exolab.castor.xml.schema.Particle;
75 import org.exolab.castor.xml.schema.Schema;
76 import org.exolab.castor.xml.schema.SchemaException;
77 import org.exolab.castor.xml.schema.SimpleType;
78 import org.exolab.castor.xml.schema.SimpleTypesFactory;
79 import org.exolab.castor.xml.schema.Wildcard;
80 import org.exolab.castor.xml.schema.writer.SchemaWriter;
81 import org.xml.sax.SAXException;
82
83
84
85
86
87
88
89
90
91 public class Converter {
92
93 private static final Log log = LogFactory.getLog(Converter.class);
94
95 public static final String NAME_SPACE_PREFIX_KEY = "nameSpacePrefixKey";
96
97 public static final String NAME_SPACE_KEY = "nameSpaceKey";
98
99 public static final String DEFAULT_NAME_SPACE_PREFIX = "tns";
100
101 public static final String DEFAULT_NAME_SPACE = "generated.castor.org";
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125 public static void main(String args[])
126 throws IOException, DTDException, SchemaException, SAXException {
127
128 StringBuilder header = new StringBuilder().append('\n')
129 .append("Converts a DTD to an XML schema.\n\n")
130 .append(" <DTD>: Name of the input DTD file.\n")
131 .append(" <XSD>: Name of the output XML schema file.\n").append('\n').append("Options:");
132
133 Options options = new Options();
134
135 Option targetNamespace =
136 Option.builder("tns").desc("target namespace of the XML schema generated").required(false)
137 .longOpt("targetNamespace").hasArg().argName("[prefix:]uri").build();
138
139 Option xmlns = Option.builder("xmlns").desc("xml namespace declarations").required(false)
140 .hasArgs().valueSeparator(',').argName("[[prefix:]uri]*").build();
141
142 options.addOption(targetNamespace);
143 options.addOption(xmlns);
144 options.addOption("h", "help", false, "prints usage information");
145 options.addOption("e", "encoding", false, "character encoding");
146
147 CommandLineParser parser = new DefaultParser();
148 CommandLine line = null;
149 try {
150 line = parser.parse(options, args);
151 } catch (org.apache.commons.cli.ParseException e) {
152 System.err.println("Parsing failed. Reason: " + e.getMessage());
153 }
154
155 if (args.length < 2 || line.hasOption("help")) {
156 HelpFormatter formatter = new HelpFormatter();
157 formatter.printHelp("org.exolab.castor.xml.dtd.Converter <DTD> <XSD>", header.toString(),
158 options, "");
159 return;
160 }
161
162 String encoding = "US-ASCII";
163 String targetNameSpace = DEFAULT_NAME_SPACE;
164 Map<String, String> nameSpaceMap = new HashMap<String, String>();
165
166 if (line.hasOption("tns")) {
167 log.info("Found option -tns ...");
168 Map<String, String> nameSpaceMapTemp = parseNamespace(line.getOptionValue("tns"));
169 targetNameSpace = nameSpaceMapTemp.get(NAME_SPACE_KEY);
170 nameSpaceMap.put(nameSpaceMapTemp.get(NAME_SPACE_PREFIX_KEY), targetNameSpace);
171 }
172
173 if (line.hasOption("xmlns")) {
174 log.info("Found option -xmlns ...");
175 Map<String, String> nameSpaceMapTemp = parseNamespace(line.getOptionValue("xmlns"));
176 nameSpaceMap.put(nameSpaceMapTemp.get(NAME_SPACE_PREFIX_KEY),
177 nameSpaceMapTemp.get(NAME_SPACE_KEY));
178 }
179
180 if (line.hasOption("encoding")) {
181 log.info("Found option -encoding ...");
182 String encodingValue = line.getOptionValue("encoding");
183 if (encodingValue.equalsIgnoreCase("ascii") || args[2].equalsIgnoreCase("us-ascii")) {
184 encoding = "US-ASCII";
185 } else if (encodingValue.equalsIgnoreCase("utf-8")) {
186 encoding = "UTF-8";
187 } else if (encodingValue.equalsIgnoreCase("utf-16")) {
188 encoding = "UTF-16";
189 } else {
190 encoding = encodingValue.toUpperCase();
191 }
192 }
193
194 String inFile = args[0];
195 String outFile = args[1];
196
197 Converter convertor = new Converter();
198 convertor.process(inFile, outFile, encoding, targetNameSpace, nameSpaceMap);
199 }
200
201
202
203
204
205
206
207
208
209
210
211
212 public void process(final String dtdFile, final String schemaFile, final String encoding,
213 final String targetNamespace, final Map<String, String> namespaces)
214 throws SchemaException, DTDException, IOException, SAXException {
215
216
217 FileInputStream inputStream = new FileInputStream(dtdFile);
218
219
220 InputStreamReader reader = new InputStreamReader(inputStream, encoding);
221
222
223 FileOutputStream outputStream = new FileOutputStream(schemaFile);
224
225
226 OutputStreamWriter writer = new OutputStreamWriter(outputStream, encoding);
227
228 process(reader, writer, encoding, targetNamespace, namespaces);
229 }
230
231
232
233
234
235
236
237
238
239
240
241
242 public void process(final Reader in, final Writer out, final String encoding,
243 final String targetNamespace, final Map<String, String> namespaces)
244 throws SchemaException, DTDException, IOException, SAXException {
245
246 try {
247
248 convertDTDtoSchema(in, out, targetNamespace, namespaces);
249 } finally {
250 in.close();
251 out.close();
252 }
253 }
254
255 protected static Map<String, String> parseNamespace(final String nameSpaceArg) {
256
257 Map<String, String> nameSpaceMap = new HashMap<String, String>();
258
259 if ("".equals(nameSpaceArg.substring("-tns=".length()))) {
260 throw new RuntimeException("name space argument is emty, Spaces after '='? ");
261 }
262
263 String[] tnsToken = nameSpaceArg.substring("-tns=".length()).split(":", 2);
264
265 if (isNameSpacePrefix(tnsToken[0])) {
266 nameSpaceMap.put(NAME_SPACE_PREFIX_KEY, tnsToken[0]);
267 nameSpaceMap.put(NAME_SPACE_KEY, tnsToken[1]);
268 } else {
269 nameSpaceMap.put(NAME_SPACE_KEY, nameSpaceArg.substring("-tns=".length()));
270 nameSpaceMap.put(NAME_SPACE_PREFIX_KEY, "tns");
271 }
272
273 return nameSpaceMap;
274
275 }
276
277 protected static boolean isNameSpacePrefix(String nameSpacePrefix2test) {
278
279 if (!nameSpacePrefix2test.matches("[a-z]*")) {
280 return false;
281 }
282
283 if (nameSpacePrefix2test.matches("https?")) {
284 return false;
285 }
286
287 return true;
288
289 }
290
291
292
293
294
295
296
297
298
299
300
301
302
303 public void convertDTDtoSchema(Reader reader, Writer writer, String targetNameSpace,
304 Map<String, String> nameSpaceMap)
305 throws DTDException, SchemaException, IOException, SAXException {
306
307
308 DTDdocument dtd = parseDTD(reader);
309
310
311 Schema schema = convertDTDObjectToSchemaObject(dtd, targetNameSpace, nameSpaceMap);
312
313
314 marshalSchema(schema, writer);
315
316 }
317
318
319
320
321
322
323
324
325
326
327
328
329 public void convertDTDtoSchema(Reader reader, Writer writer)
330 throws DTDException, SchemaException, IOException, SAXException {
331
332 String targetNameSpace = DEFAULT_NAME_SPACE;
333 Map<String, String> nameSpaceMap = new HashMap<String, String>();
334
335 convertDTDtoSchema(reader, writer, targetNameSpace, nameSpaceMap);
336 }
337
338
339
340
341
342
343
344
345
346
347
348
349
350 public DTDdocument parseDTD(Reader reader) throws DTDException {
351 try {
352
353
354 InputCharStream charStream = new InputCharStream(reader);
355
356
357 DTDInitialParser initialParser = new DTDInitialParser(charStream);
358
359
360
361 String intermedResult = initialParser.Input();
362
363
364 try (StringReader strReader = new StringReader(intermedResult)) {
365
366 charStream = new InputCharStream(strReader);
367
368
369 DTDParser parser = new DTDParser(charStream);
370
371
372
373 DTDdocument dtd = parser.Input();
374
375
376 return dtd;
377 }
378 } catch (TokenMgrError tme) {
379 String msg = tme.getMessage();
380 throw new DTDException("TokenMgrError" + (msg == null ? "" : ": " + msg));
381 } catch (ParseException pe) {
382 String msg = pe.getMessage();
383 throw new DTDException("ParseException" + (msg == null ? "" : ": " + msg));
384 }
385 }
386
387
388
389
390
391
392
393
394
395
396
397
398
399 public Schema convertDTDObjectToSchemaObject(DTDdocument dtd, String targetNamespace,
400 Map<String, String> nameSpaceMap) throws DTDException, SchemaException {
401
402 Schema schema = new Schema();
403
404 String name = dtd.getName();
405 if (name != null && !name.equals("")) {
406 schema.setId(name);
407 }
408
409 schema.setTargetNamespace(targetNamespace);
410
411 for (Map.Entry<String, String> entry : nameSpaceMap.entrySet()) {
412 schema.addNamespace(entry.getKey(), entry.getValue());
413 }
414
415
416 Enumeration<Notation> dtdNotations = dtd.getNotations();
417
418 while (dtdNotations.hasMoreElements()) {
419 dtdNotations.nextElement();
420
421
422 }
423
424
425
426
427
428
429 Enumeration<GeneralEntity> dtdGeneralEntities = dtd.getGeneralEntities();
430 if (dtdGeneralEntities.hasMoreElements()) {
431 Annotation annotation = new Annotation();
432
433 while (dtdGeneralEntities.hasMoreElements()) {
434 GeneralEntity ge = dtdGeneralEntities.nextElement();
435 Documentation documentation = new Documentation();
436
437 String text = "General Entity Declaration";
438 documentation.add(text);
439 documentation.add(ge);
440 annotation.addDocumentation(documentation);
441 }
442
443 schema.addAnnotation(annotation);
444 }
445
446
447
448 Enumeration<Element> dtdElements = dtd.getElements();
449 while (dtdElements.hasMoreElements()) {
450 Element dtdElement = dtdElements.nextElement();
451 ElementDecl schemaElement = convertDTDElementToSchemaElement(dtdElement, schema);
452 schema.addElementDecl(schemaElement);
453 }
454
455 return schema;
456
457 }
458
459
460
461
462
463
464
465
466
467
468
469
470 public ElementDecl convertDTDElementToSchemaElement(Element dtdElement, Schema schema)
471 throws DTDException, SchemaException {
472
473 String name = dtdElement.getName();
474 if (name == null || name.equals("")) {
475 String err = "DTD to Schema converter: a DTD element has no name.";
476 throw new DTDException(err);
477 }
478 ElementDecl schemaElement = new ElementDecl(schema, name);
479
480
481 ComplexType complexType = schema.createComplexType();
482 ContentType contentType = null;
483 Group group = null;
484 Iterator mixedChildrenIterator = null;
485 String elementRef = null;
486 ElementDecl elem = null;
487
488 if (dtdElement.isEmptyContent()) {
489
490
491 contentType = ContentType.elemOnly;
492
493
494 } else if (dtdElement.isAnyContent()) {
495
496
497 contentType = ContentType.mixed;
498
499
500 group = new Group();
501 group.setOrder(Order.sequence);
502 group.setMinOccurs(0);
503 group.setMaxOccurs(-1);
504 Wildcard any = new Wildcard(group);
505 group.addWildcard(any);
506 complexType.addGroup(group);
507
508 } else if (dtdElement.isElemOnlyContent()) {
509
510
511 contentType = ContentType.elemOnly;
512
513
514 ContentParticle dtdContent = dtdElement.getContent();
515 if (dtdContent == null) {
516 String err = "DTD to Schema converter: element \"" + dtdElement.getName();
517 err += "\" has no content.";
518 throw new DTDException(err);
519 }
520
521 Particle content = null;
522 try {
523 content = convertContentParticle(dtdContent, schema);
524 } catch (DTDException e) {
525 String err = "DTD to Schema converter: content of DTD element \"" + dtdElement.getName();
526 err += "\", represented by a Content Particle, is malformed.";
527 throw new DTDException(err);
528 }
529
530 if (content instanceof ElementDecl) {
531 group = new Group();
532 group.setOrder(Order.sequence);
533 group.addElementDecl((ElementDecl) content);
534 complexType.addGroup(group);
535 } else {
536 complexType.addGroup((Group) content);
537 }
538
539 } else if (dtdElement.isMixedContent()) {
540
541
542 contentType = ContentType.mixed;
543
544
545 mixedChildrenIterator = dtdElement.getMixedContentChildren();
546 if ((mixedChildrenIterator != null) && (mixedChildrenIterator.hasNext())) {
547 group = new Group();
548 group.setOrder(Order.choice);
549 group.setMinOccurs(0);
550 group.setMaxOccurs(-1);
551 while (mixedChildrenIterator.hasNext()) {
552 elementRef = (String) mixedChildrenIterator.next();
553 elem = new ElementDecl(schema);
554 elem.setReferenceName(elementRef);
555 group.addElementDecl(elem);
556 }
557 complexType.addGroup(group);
558 }
559
560 } else {
561 String err = "DTD to Schema converter: content type of DTD element \"" + dtdElement.getName()
562 + "\" has not been specified.";
563 throw new DTDException(err);
564 }
565 complexType.setContentType(contentType);
566
567
568
569 Enumeration<Attribute> dtdAttributes = dtdElement.getAttributes();
570 while (dtdAttributes.hasMoreElements()) {
571 Attribute dtdAttribute = dtdAttributes.nextElement();
572 AttributeDecl schemaAttribute = convertAttribute(dtdAttribute, schema);
573 complexType.addAttributeDecl(schemaAttribute);
574 }
575
576
577 schemaElement.setType(complexType);
578 return schemaElement;
579
580 }
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596 public Particle convertContentParticle(ContentParticle dtdContent, Schema schema)
597 throws DTDException, SchemaException {
598
599 Particle returnValue;
600
601 if (dtdContent.isReferenceType()) {
602
603 ElementDecl elem = new ElementDecl(schema);
604 elem.setReferenceName(dtdContent.getReference());
605 returnValue = elem;
606
607 } else if (dtdContent.isSeqType() || dtdContent.isChoiceType()) {
608
609 Group group = new Group();
610 if (dtdContent.isSeqType())
611 group.setOrder(Order.sequence);
612 else
613 group.setOrder(Order.choice);
614
615 Enumeration<ContentParticle> children = dtdContent.getChildren();
616 while (children.hasMoreElements()) {
617 ContentParticle child = children.nextElement();
618 Particle contentParticle = convertContentParticle(child, schema);
619
620 if (contentParticle instanceof ElementDecl) {
621 group.addElementDecl((ElementDecl) contentParticle);
622 } else {
623 group.addGroup((Group) contentParticle);
624 }
625 }
626
627 returnValue = group;
628
629 } else {
630 throw new DTDException();
631 }
632
633 if (dtdContent.isOneOccurance()) {
634 returnValue.setMinOccurs(1);
635 returnValue.setMaxOccurs(1);
636 } else if (dtdContent.isOneOrMoreOccurances()) {
637 returnValue.setMinOccurs(1);
638 returnValue.setMaxOccurs(-1);
639 } else if (dtdContent.isZeroOrMoreOccurances()) {
640 returnValue.setMinOccurs(0);
641 returnValue.setMaxOccurs(-1);
642 } else if (dtdContent.isZeroOrOneOccurance()) {
643 returnValue.setMinOccurs(0);
644 returnValue.setMaxOccurs(1);
645 } else {
646
647
648 }
649
650 return returnValue;
651
652 }
653
654
655
656
657
658
659
660
661
662 public AttributeDecl convertAttribute(Attribute dtdAttribute, Schema schema) throws DTDException {
663
664 AttributeDecl schemaAttribute = new AttributeDecl(schema, dtdAttribute.getName());
665
666 SimpleType type = null;
667
668 if (dtdAttribute.isStringType()) {
669 type = schema.getSimpleType(schema.getBuiltInTypeName(SimpleTypesFactory.STRING_TYPE));
670 } else if (dtdAttribute.isIDType()) {
671 type = schema.getSimpleType(schema.getBuiltInTypeName(SimpleTypesFactory.ID_TYPE));
672 } else if (dtdAttribute.isIDREFType()) {
673 type = schema.getSimpleType(schema.getBuiltInTypeName(SimpleTypesFactory.IDREF_TYPE));
674 } else if (dtdAttribute.isIDREFSType()) {
675 type = schema.getSimpleType(schema.getBuiltInTypeName(SimpleTypesFactory.IDREFS_TYPE));
676 } else if (dtdAttribute.isENTITYType()) {
677 type = schema.getSimpleType(schema.getBuiltInTypeName(SimpleTypesFactory.ENTITY_TYPE));
678 } else if (dtdAttribute.isENTITIESType()) {
679 type = schema.getSimpleType(schema.getBuiltInTypeName(SimpleTypesFactory.ENTITIES_TYPE));
680 } else if (dtdAttribute.isNMTOKENType()) {
681 type = schema.getSimpleType(schema.getBuiltInTypeName(SimpleTypesFactory.NMTOKEN_TYPE));
682 } else if (dtdAttribute.isNMTOKENSType()) {
683 type = schema.getSimpleType(schema.getBuiltInTypeName(SimpleTypesFactory.NMTOKENS_TYPE));
684 } else if (dtdAttribute.isNOTATIONType()) {
685 type = schema.createSimpleType(null,
686 schema.getBuiltInTypeName(SimpleTypesFactory.NOTATION_TYPE), "restriction");
687 Iterator<String> values = dtdAttribute.getValues();
688 FacetFactory facetFactory = FacetFactory.getInstance();
689 while (values.hasNext()) {
690 Facet facet = facetFactory.createFacet(Facet.ENUMERATION, values.next());
691 facet.setOwningType(type);
692 type.addFacet(facet);
693 }
694
695 } else if (dtdAttribute.isEnumerationType()) {
696 type = schema.createSimpleType(null,
697 schema.getBuiltInTypeName(SimpleTypesFactory.NMTOKEN_TYPE), "restriction");
698 Iterator<String> values = dtdAttribute.getValues();
699 FacetFactory facetFactory = FacetFactory.getInstance();
700 while (values.hasNext()) {
701 Facet facet = facetFactory.createFacet(Facet.ENUMERATION, values.next());
702 facet.setOwningType(type);
703 type.addFacet(facet);
704 }
705 } else {
706 String err = "DTD to Schema converter: DTD attribute \"" + dtdAttribute.getName()
707 + "\" has unspecified type.";
708 throw new DTDException(err);
709 }
710
711 schemaAttribute.setSimpleType(type);
712
713 if (dtdAttribute.isREQUIRED()) {
714 schemaAttribute.setUse(AttributeDecl.USE_REQUIRED);
715 } else if (dtdAttribute.isIMPLIED()) {
716 schemaAttribute.setUse(AttributeDecl.USE_OPTIONAL);
717 } else if (dtdAttribute.isFIXED()) {
718
719 } else {
720 schemaAttribute.setDefaultValue(dtdAttribute.getDefaultValue());
721 }
722
723 return schemaAttribute;
724 }
725
726
727
728
729
730
731
732
733
734 public void marshalSchema(Schema schema, Writer writer) throws IOException, SAXException {
735
736 SchemaWriter sw = new SchemaWriter(writer);
737 sw.write(schema);
738 }
739
740 }