1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 package com.github.davidmoten.aws.lw.client.xml;
32
33 import java.io.ByteArrayOutputStream;
34 import java.io.IOException;
35 import java.io.OutputStreamWriter;
36 import java.io.Reader;
37 import java.io.StringReader;
38 import java.io.UncheckedIOException;
39 import java.io.Writer;
40 import java.nio.charset.StandardCharsets;
41 import java.util.ArrayList;
42 import java.util.Collections;
43 import java.util.Enumeration;
44 import java.util.HashMap;
45 import java.util.List;
46 import java.util.Map;
47 import java.util.NoSuchElementException;
48 import java.util.Set;
49 import java.util.stream.Collectors;
50
51 import com.github.davidmoten.aws.lw.client.internal.util.Preconditions;
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 public final class XmlElement {
89
90 private List<XmlElement> children;
91 private Map<String, String> attributes;
92 private String name;
93
94
95
96
97 private String content;
98
99 private static final Map<String, char[]> ENTITIES = createEntities();
100
101
102
103
104
105 private int lineNr;
106
107
108
109
110
111 private final boolean ignoreLeadingAndTrailingWhitespace;
112
113
114
115
116
117
118 private char charReadTooMuch;
119
120
121
122
123 private Reader reader;
124
125
126
127
128 private int parserLineNr;
129
130 private XmlElement(boolean ignoreLeadingAndTrailingWhitespace) {
131 this.ignoreLeadingAndTrailingWhitespace = ignoreLeadingAndTrailingWhitespace;
132 this.name = null;
133 this.content = "";
134 this.attributes = new HashMap<>();
135 this.children = new ArrayList<>();
136 this.lineNr = 0;
137 }
138
139 public void addChild(XmlElement child) {
140 children.add(child);
141 }
142
143 public int countChildren() {
144 return children.size();
145 }
146
147 public boolean hasChildren() {
148 return !children.isEmpty();
149 }
150
151 public Set<String> attributeNames() {
152 return attributes.keySet();
153 }
154
155 public List<XmlElement> children() {
156 return children;
157 }
158
159 public List<XmlElement> childrenWithName(String name) {
160 return children.stream().filter(x -> name.equals(x.name())).collect(Collectors.toList());
161 }
162
163 public XmlElement firstChild() {
164 return children.get(0);
165 }
166
167 public XmlElement child(int index) {
168 return children.get(index);
169 }
170
171 public XmlElement child(String... names) {
172 XmlElement x = this;
173 XmlElement y = null;
174 for (String name : names) {
175 for (XmlElement child : x.children) {
176 if (child.name().equals(name)) {
177 y = child;
178 }
179 }
180 if (y == null) {
181 throw new NoSuchElementException("child not found with name: " + name);
182 } else {
183 x = y;
184 }
185 }
186 return y;
187 }
188
189 public String content(String... names) {
190 return child(names).content();
191 }
192
193
194
195
196
197 public String content() {
198 return this.content;
199 }
200
201
202
203
204
205 public int lineNumber() {
206 return this.lineNr;
207 }
208
209
210
211
212
213
214
215 public String attribute(String name) {
216 return this.attribute(name, null);
217 }
218
219
220
221
222
223
224
225
226 public String attribute(String name, String defaultValue) {
227 Preconditions.checkNotNull(name);
228 return this.attributes.getOrDefault(name, defaultValue);
229 }
230
231
232
233
234 public String name() {
235 return this.name;
236 }
237
238 public static XmlElement parse(Reader reader) throws XmlParseException, IOException {
239 return parse(reader, true);
240 }
241
242 public static XmlElement parse(Reader reader, boolean ignoreLeadingAndTrailingWhitespace)
243 throws IOException, XmlParseException {
244 Preconditions.checkNotNull(reader);
245 XmlElement x = new XmlElement(ignoreLeadingAndTrailingWhitespace);
246 x.parseFromReader(reader);
247 return x;
248 }
249
250 private void parseFromReader(Reader reader) throws IOException, XmlParseException {
251 Preconditions.checkNotNull(reader);
252 this.name = null;
253 this.content = "";
254 this.attributes = new HashMap<>();
255 this.children = new ArrayList<>();
256 this.charReadTooMuch = '\0';
257 this.reader = reader;
258 this.parserLineNr = 1;
259
260 for (;;) {
261 char ch = this.scanWhitespace();
262
263 if (ch != '<') {
264 throw this.createUnexpectedInputException("<");
265 }
266
267 ch = this.readChar();
268
269 if ((ch == '!') || (ch == '?')) {
270 this.skipSpecialTag(0);
271 } else {
272 this.unreadChar(ch);
273 this.scanElement(this);
274 return;
275 }
276 }
277 }
278
279 public static XmlElement parse(String string) throws XmlParseException {
280 return parse(string, true);
281 }
282
283 public static XmlElement parse(String string, boolean ignoreLeadingAndTrailingWhitespace)
284 throws XmlParseException {
285 Preconditions.checkNotNull(string);
286 return parseUnchecked(new StringReader(string), ignoreLeadingAndTrailingWhitespace);
287 }
288
289
290 static XmlElement parseUnchecked(Reader reader, boolean ignoreLeadingAndTrailingWhitespace)
291 throws XmlParseException {
292 try {
293 return parse(reader, ignoreLeadingAndTrailingWhitespace);
294 } catch (IOException e) {
295 throw new UncheckedIOException(e);
296 }
297 }
298
299 private XmlElement createAnotherElement() {
300 return new XmlElement(this.ignoreLeadingAndTrailingWhitespace);
301 }
302
303 public String toString() {
304 ByteArrayOutputStream out = new ByteArrayOutputStream();
305 OutputStreamWriter writer = new OutputStreamWriter(out, StandardCharsets.UTF_8);
306 writeUnchecked(writer);
307 return new String(out.toByteArray(), StandardCharsets.UTF_8);
308 }
309
310
311 void writeUnchecked(Writer writer) {
312 try {
313 this.write(writer);
314 } catch (IOException e) {
315 throw new UncheckedIOException(e);
316 } finally {
317 try {
318 writer.close();
319 } catch (IOException e) {
320 throw new UncheckedIOException(e);
321 }
322 }
323 }
324
325 public void write(Writer writer) throws IOException {
326 Preconditions.checkNotNull(writer);
327 Preconditions.checkNotNull(name);
328 Preconditions.checkNotNull(content);
329
330
331
332
333 writer.write('<');
334 writer.write(this.name);
335 if (!this.attributes.isEmpty()) {
336 Enumeration<String> en = Collections.enumeration(this.attributes.keySet());
337 while (en.hasMoreElements()) {
338 writer.write(' ');
339 String key = (String) en.nextElement();
340 String value = (String) this.attributes.get(key);
341 writer.write(key);
342 writer.write('=');
343 writer.write('"');
344 writeEncoded(writer, value);
345 writer.write('"');
346 }
347 }
348 if (!content.isEmpty()) {
349 writer.write('>');
350 writeEncoded(writer, this.content);
351 writer.write('<');
352 writer.write('/');
353 writer.write(this.name);
354 writer.write('>');
355 } else if (children.isEmpty()) {
356 writer.write('/');
357 writer.write('>');
358 } else {
359 writer.write('>');
360 for (XmlElement child : children) {
361 child.write(writer);
362 }
363 writer.write('<');
364 writer.write('/');
365 writer.write(this.name);
366 writer.write('>');
367 }
368 }
369
370 private static void writeEncoded(Writer writer, String str) throws IOException {
371 for (int i = 0; i < str.length(); i += 1) {
372 char ch = str.charAt(i);
373 switch (ch) {
374 case '<':
375 writer.write('&');
376 writer.write('l');
377 writer.write('t');
378 writer.write(';');
379 break;
380 case '>':
381 writer.write('&');
382 writer.write('g');
383 writer.write('t');
384 writer.write(';');
385 break;
386 case '&':
387 writer.write('&');
388 writer.write('a');
389 writer.write('m');
390 writer.write('p');
391 writer.write(';');
392 break;
393 case '"':
394 writer.write('&');
395 writer.write('q');
396 writer.write('u');
397 writer.write('o');
398 writer.write('t');
399 writer.write(';');
400 break;
401 case '\'':
402 writer.write('&');
403 writer.write('a');
404 writer.write('p');
405 writer.write('o');
406 writer.write('s');
407 writer.write(';');
408 break;
409 default:
410 int unicode = (int) ch;
411 if ((unicode < 32) || (unicode > 126)) {
412 writer.write('&');
413 writer.write('#');
414 writer.write('x');
415 writer.write(Integer.toString(unicode, 16));
416 writer.write(';');
417 } else {
418 writer.write(ch);
419 }
420 }
421 }
422 }
423
424
425
426
427
428
429
430 private void scanIdentifier(StringBuilder result) throws IOException {
431 for (;;) {
432 char ch = this.readChar();
433 if (!isValidIdentifierCharacter(ch)) {
434 this.unreadChar(ch);
435 return;
436 }
437 result.append(ch);
438 }
439 }
440
441
442 static boolean isValidIdentifierCharacter(char ch) {
443 return ((ch >= 'A') && (ch <= 'Z')) ||
444 ((ch >= 'a') && (ch <= 'z')) ||
445 ((ch >= '0') && (ch <= '9')) ||
446 (ch == '_') ||
447 (ch == '.') ||
448 (ch == ':') ||
449 (ch == '-') ||
450 (ch > '\u007E');
451 }
452
453
454
455
456
457
458 private char scanWhitespace() throws IOException {
459 for (;;) {
460 char ch = this.readChar();
461 switch (ch) {
462 case ' ':
463 case '\t':
464 case '\n':
465 case '\r':
466 break;
467 default:
468 return ch;
469 }
470 }
471 }
472
473
474
475
476
477
478
479 private char scanWhitespace(StringBuilder result) throws IOException {
480 for (;;) {
481 char ch = this.readChar();
482 switch (ch) {
483 case ' ':
484 case '\t':
485 case '\n':
486 result.append(ch);
487 case '\r':
488 break;
489 default:
490 return ch;
491 }
492 }
493 }
494
495
496
497
498
499 private void scanString(StringBuilder string) throws IOException {
500 char delimiter = this.readChar();
501 if ((delimiter != '\'') && (delimiter != '"')) {
502 throw this.createUnexpectedInputException("' or \"");
503 }
504 for (;;) {
505 char ch = this.readChar();
506 if (ch == delimiter) {
507 return;
508 } else if (ch == '&') {
509 this.resolveEntity(string);
510 } else {
511 string.append(ch);
512 }
513 }
514 }
515
516
517
518
519
520 private void scanPCData(StringBuilder data) throws IOException {
521 for (;;) {
522 char ch = this.readChar();
523 if (ch == '<') {
524
525
526
527 ch = this.readChar();
528 if (ch == '!') {
529 this.checkCDATA(data);
530 } else {
531 this.unreadChar(ch);
532 return;
533 }
534 } else if (ch == '&') {
535 this.resolveEntity(data);
536 } else {
537 data.append(ch);
538 }
539 }
540 }
541
542
543
544
545
546 private boolean checkCDATA(StringBuilder buf) throws IOException {
547 char ch = this.readChar();
548 if (ch != '[') {
549 this.unreadChar(ch);
550 this.skipSpecialTag(0);
551 return false;
552 } else if (!this.checkLiteral("CDATA[")) {
553 this.skipSpecialTag(1);
554 return false;
555 } else {
556 int delimiterCharsSkipped = 0;
557 while (delimiterCharsSkipped < 3) {
558 ch = this.readChar();
559 switch (ch) {
560 case ']':
561 if (delimiterCharsSkipped < 2) {
562 delimiterCharsSkipped += 1;
563 } else {
564 buf.append(']');
565 buf.append(']');
566 delimiterCharsSkipped = 0;
567 }
568 break;
569 case '>':
570 if (delimiterCharsSkipped < 2) {
571 for (int i = 0; i < delimiterCharsSkipped; i++) {
572 buf.append(']');
573 }
574 delimiterCharsSkipped = 0;
575 buf.append('>');
576 } else {
577 delimiterCharsSkipped = 3;
578 }
579 break;
580 default:
581 for (int i = 0; i < delimiterCharsSkipped; i += 1) {
582 buf.append(']');
583 }
584 buf.append(ch);
585 delimiterCharsSkipped = 0;
586 }
587 }
588 return true;
589 }
590 }
591
592
593
594
595 private void skipComment() throws IOException {
596 int dashesToRead = 2;
597 while (dashesToRead > 0) {
598 char ch = this.readChar();
599 if (ch == '-') {
600 dashesToRead -= 1;
601 } else {
602 dashesToRead = 2;
603 }
604 }
605 if (this.readChar() != '>') {
606 throw this.createUnexpectedInputException(">");
607 }
608 }
609
610
611
612
613
614
615
616 private void skipSpecialTag(int bracketLevel) throws IOException {
617 int tagLevel = 1;
618 char stringDelimiter = '\0';
619 if (bracketLevel == 0) {
620 char ch = this.readChar();
621 if (ch == '[') {
622 bracketLevel += 1;
623 } else if (ch == '-') {
624 ch = this.readChar();
625 if (ch == '[') {
626 bracketLevel += 1;
627 } else if (ch == ']') {
628 bracketLevel -= 1;
629 } else if (ch == '-') {
630 this.skipComment();
631 return;
632 }
633 }
634 }
635 while (tagLevel > 0) {
636 char ch = this.readChar();
637 if (stringDelimiter == '\0') {
638 if ((ch == '"') || (ch == '\'')) {
639 stringDelimiter = ch;
640 } else if (bracketLevel <= 0) {
641 if (ch == '<') {
642 tagLevel += 1;
643 } else if (ch == '>') {
644 tagLevel -= 1;
645 }
646 }
647 if (ch == '[') {
648 bracketLevel += 1;
649 } else if (ch == ']') {
650 bracketLevel -= 1;
651 }
652 } else {
653 if (ch == stringDelimiter) {
654 stringDelimiter = '\0';
655 }
656 }
657 }
658 }
659
660
661
662
663
664
665
666 private boolean checkLiteral(String literal) throws IOException {
667 int length = literal.length();
668 for (int i = 0; i < length; i += 1) {
669 if (this.readChar() != literal.charAt(i)) {
670 return false;
671 }
672 }
673 return true;
674 }
675
676
677
678
679 private char readChar() throws IOException {
680 if (this.charReadTooMuch != '\0') {
681 char ch = this.charReadTooMuch;
682 this.charReadTooMuch = '\0';
683 return ch;
684 } else {
685 int i = this.reader.read();
686 if (i < 0) {
687 throw this.createExceptionUnexpectedEndOfData();
688 } else if (i == 10) {
689 this.parserLineNr += 1;
690 return '\n';
691 } else {
692 return (char) i;
693 }
694 }
695 }
696
697 private void scanElement(XmlElement elt) throws IOException {
698 StringBuilder buf = new StringBuilder();
699 this.scanIdentifier(buf);
700 String name = buf.toString();
701 elt.name = name;
702 char ch = this.scanWhitespace();
703 while ((ch != '>') && (ch != '/')) {
704 buf.setLength(0);
705 this.unreadChar(ch);
706 this.scanIdentifier(buf);
707 String key = buf.toString();
708 ch = this.scanWhitespace();
709 if (ch != '=') {
710 throw this.createUnexpectedInputException("=");
711 }
712 this.unreadChar(this.scanWhitespace());
713 buf.setLength(0);
714 this.scanString(buf);
715 elt.attributes.put(key, buf.toString());
716 ch = this.scanWhitespace();
717 }
718 if (ch == '/') {
719 ch = this.readChar();
720 if (ch != '>') {
721 throw this.createUnexpectedInputException(">");
722 }
723 return;
724 }
725 buf.setLength(0);
726 ch = this.scanWhitespace(buf);
727 if (ch != '<') {
728 this.unreadChar(ch);
729 this.scanPCData(buf);
730 } else {
731 for (;;) {
732 ch = this.readChar();
733 if (ch == '!') {
734 if (this.checkCDATA(buf)) {
735 this.scanPCData(buf);
736 break;
737 } else {
738 ch = this.scanWhitespace(buf);
739 if (ch != '<') {
740 this.unreadChar(ch);
741 this.scanPCData(buf);
742 break;
743 }
744 }
745 } else {
746 if ((ch != '/') || this.ignoreLeadingAndTrailingWhitespace) {
747 buf.setLength(0);
748 }
749 if (ch == '/') {
750 this.unreadChar(ch);
751 }
752 break;
753 }
754 }
755 }
756 if (buf.length() == 0) {
757 while (ch != '/') {
758 if (ch == '!') {
759 ch = this.readChar();
760 if (ch != '-') {
761 throw this.createUnexpectedInputException("Comment or Element");
762 }
763 ch = this.readChar();
764 if (ch != '-') {
765 throw this.createUnexpectedInputException("Comment or Element");
766 }
767 this.skipComment();
768 } else {
769 this.unreadChar(ch);
770 XmlElement child = this.createAnotherElement();
771 this.scanElement(child);
772 elt.addChild(child);
773 }
774 ch = this.scanWhitespace();
775 if (ch != '<') {
776 throw this.createUnexpectedInputException("<");
777 }
778 ch = this.readChar();
779 }
780 this.unreadChar(ch);
781 } else {
782 if (this.ignoreLeadingAndTrailingWhitespace) {
783 elt.content = buf.toString().trim();
784 } else {
785 elt.content = buf.toString();
786 }
787 }
788 ch = this.readChar();
789 if (ch != '/') {
790 throw this.createUnexpectedInputException("/");
791 }
792 this.unreadChar(this.scanWhitespace());
793
794 if (!this.checkLiteral(name)) {
795 throw this.createUnexpectedInputException(name);
796 }
797 if (this.scanWhitespace() != '>') {
798 throw this.createUnexpectedInputException(">");
799 }
800 }
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822 private void resolveEntity(StringBuilder buf) throws IOException {
823 char ch = '\0';
824 StringBuilder keyBuf = new StringBuilder();
825 for (;;) {
826 ch = this.readChar();
827 if (ch == ';') {
828 break;
829 }
830 keyBuf.append(ch);
831 }
832 String key = keyBuf.toString();
833 if (key.charAt(0) == '#') {
834 try {
835 if (key.charAt(1) == 'x') {
836 ch = (char) Integer.parseInt(key.substring(2), 16);
837 } else {
838 ch = (char) Integer.parseInt(key.substring(1), 10);
839 }
840 } catch (NumberFormatException e) {
841 throw this.createExceptionUnknownEntity(key);
842 }
843 buf.append(ch);
844 } else {
845 char[] value = (char[]) ENTITIES.get(key);
846 if (value == null) {
847 throw this.createExceptionUnknownEntity(key);
848 }
849 buf.append(value);
850 }
851 }
852
853
854
855
856
857
858 private void unreadChar(char ch) {
859 this.charReadTooMuch = ch;
860 }
861
862
863
864
865
866 private XmlParseException createExceptionUnexpectedEndOfData() {
867 String msg = "Unexpected end of data reached";
868 return new XmlParseException(this.name(), this.parserLineNr, msg);
869 }
870
871
872
873
874
875
876
877
878 private XmlParseException createUnexpectedInputException(String charSet) {
879 String msg = "Expected: " + charSet;
880 return new XmlParseException(this.name(), this.parserLineNr, msg);
881 }
882
883 private XmlParseException createExceptionUnknownEntity(String name) {
884 String msg = "Unknown or invalid entity: &" + name + ";";
885 return new XmlParseException(this.name(), this.parserLineNr, msg);
886 }
887
888 private static Map<String, char[]> createEntities() {
889 Map<String, char[]> map = new HashMap<>();
890 map.put("amp", new char[] {'&'});
891 map.put("quot", new char[] {'"'});
892 map.put("apos", new char[] {'\''});
893 map.put("lt", new char[] {'<'});
894 map.put("gt", new char[] {'>'});
895 return map;
896 }
897
898 }