1 package org.codehaus.groovy.sandbox.util;
2 import groovy.lang.Closure;
3 import groovy.lang.GroovyObject;
4 import groovy.lang.GroovyObjectSupport;
5 import groovy.lang.Writable;
6
7 import java.io.File;
8 import java.io.FileInputStream;
9 import java.io.IOException;
10 import java.io.InputStream;
11 import java.io.Reader;
12 import java.io.StringReader;
13 import java.io.Writer;
14 import java.security.AccessController;
15 import java.security.PrivilegedActionException;
16 import java.security.PrivilegedExceptionAction;
17 import java.util.HashMap;
18 import java.util.Iterator;
19 import java.util.LinkedList;
20 import java.util.List;
21 import java.util.Map;
22
23 import javax.xml.parsers.ParserConfigurationException;
24 import javax.xml.parsers.SAXParser;
25 import javax.xml.parsers.SAXParserFactory;
26
27 import org.codehaus.groovy.sandbox.markup.Buildable;
28 import org.xml.sax.Attributes;
29 import org.xml.sax.InputSource;
30 import org.xml.sax.SAXException;
31 import org.xml.sax.XMLReader;
32 import org.xml.sax.helpers.DefaultHandler;
33
34
35 public class XmlSlurper extends DefaultHandler {
36 private final XMLReader reader;
37 private List result = null;
38 private List body = null;
39 private final StringBuffer charBuffer = new StringBuffer();
40
41 public XmlSlurper() throws ParserConfigurationException, SAXException {
42 this(false, true);
43 }
44
45 public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException {
46 SAXParserFactory factory = null;
47
48 try {
49 factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
50 public Object run() throws ParserConfigurationException {
51 return SAXParserFactory.newInstance();
52 }
53 });
54 } catch (final PrivilegedActionException pae) {
55 final Exception e = pae.getException();
56
57 if (e instanceof ParserConfigurationException) {
58 throw (ParserConfigurationException) e;
59 } else {
60 throw new RuntimeException(e);
61 }
62 }
63 factory.setNamespaceAware(namespaceAware);
64 factory.setValidating(validating);
65
66 final SAXParser parser = factory.newSAXParser();
67 this.reader = parser.getXMLReader();
68 }
69
70 public XmlSlurper(final XMLReader reader) {
71 this.reader = reader;
72 }
73
74 public XmlSlurper(final SAXParser parser) throws SAXException {
75 this(parser.getXMLReader());
76 }
77
78 /***
79 * Parse the content of the specified input source into a List
80 */
81 public XmlList parse(final InputSource input) throws IOException, SAXException {
82 this.reader.setContentHandler(this);
83 this.reader.parse(input);
84
85 return (XmlList)this.result.get(0);
86 }
87
88 /***
89 * Parses the content of the given file as XML turning it into a List
90 */
91 public XmlList parse(final File file) throws IOException, SAXException {
92 final InputSource input = new InputSource(new FileInputStream(file));
93
94 input.setSystemId("file://" + file.getAbsolutePath());
95
96 return parse(input);
97
98 }
99
100 /***
101 * Parse the content of the specified input stream into a List.
102 * Note that using this method will not provide the parser with any URI
103 * for which to find DTDs etc
104 */
105 public XmlList parse(final InputStream input) throws IOException, SAXException {
106 return parse(new InputSource(input));
107 }
108
109 /***
110 * Parse the content of the specified reader into a List.
111 * Note that using this method will not provide the parser with any URI
112 * for which to find DTDs etc
113 */
114 public XmlList parse(final Reader in) throws IOException, SAXException {
115 return parse(new InputSource(in));
116 }
117
118 /***
119 * Parse the content of the specified URI into a List
120 */
121 public XmlList parse(final String uri) throws IOException, SAXException {
122 return parse(new InputSource(uri));
123 }
124
125 /***
126 * A helper method to parse the given text as XML
127 *
128 * @param text
129 * @return
130 */
131 public XmlList parseText(final String text) throws IOException, SAXException {
132 return parse(new StringReader(text));
133 }
134
135
136
137
138
139
140
141
142 public void startDocument() throws SAXException {
143 this.result = null;
144 this.body = new LinkedList();
145 this.charBuffer.setLength(0);
146 }
147
148
149
150
151 public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException {
152 addNonWhitespaceCdata();
153
154 final Map attributes = new HashMap();
155
156 for (int i = atts.getLength() - 1; i != -1; i--) {
157 if (atts.getURI(i).length() == 0) {
158 attributes.put(atts.getQName(i), atts.getValue(i));
159 } else {
160
161
162
163
164 attributes.put(atts.getLocalName(i), atts.getValue(i));
165 }
166
167 }
168
169 final List newBody = new LinkedList();
170
171 newBody.add(attributes);
172
173 newBody.add(this.body);
174
175 this.body = newBody;
176 }
177
178
179
180
181 public void characters(final char[] ch, final int start, final int length) throws SAXException {
182 this.charBuffer.append(ch, start, length);
183 }
184
185
186
187
188 public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException {
189 addNonWhitespaceCdata();
190
191 final List children = this.body;
192
193 final Map attributes = (Map)this.body.remove(0);
194
195 this.body = (List)this.body.remove(0);
196
197 if (namespaceURI.length() == 0) {
198 this.body.add(new XmlList(qName, attributes, children, namespaceURI));
199 } else {
200 this.body.add(new XmlList(localName, attributes, children, namespaceURI));
201 }
202 }
203
204
205
206
207 public void endDocument() throws SAXException {
208 this.result = this.body;
209 this.body = null;
210 }
211
212
213
214
215 /***
216 *
217 */
218 private void addNonWhitespaceCdata() {
219 if (this.charBuffer.length() != 0) {
220
221
222
223
224
225 final String cdata = this.charBuffer.toString();
226
227 this.charBuffer.setLength(0);
228 if (cdata.trim().length() != 0) {
229 this.body.add(cdata);
230 }
231 }
232 }
233 }
234
235 class XmlList extends GroovyObjectSupport implements Writable, Buildable {
236 final String name;
237 final Map attributes;
238 final Object[] children;
239 final String namespaceURI;
240
241 public XmlList(final String name, final Map attributes, final List body, final String namespaceURI) {
242 super();
243
244 this.name = name;
245 this.attributes = attributes;
246 this.children = body.toArray();
247 this.namespaceURI = namespaceURI;
248 }
249
250 public Object getProperty(final String elementName) {
251 if (elementName.startsWith("@")) {
252 return this.attributes.get(elementName.substring(1));
253 } else {
254 final int indexOfFirst = getNextXmlElement(elementName, -1);
255
256 if (indexOfFirst == -1) {
257 return new ElementCollection() {
258 protected ElementCollection getResult(final String property) {
259 return this;
260 }
261
262 /***
263 *
264 * Used by the Invoker when it wants to iterate over this object
265 *
266 * @return
267 */
268 public ElementIterator iterator() {
269 return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{-1}) {
270 {
271 findNextChild();
272 }
273
274 protected void findNextChild() {
275 this.nextParentElements[0] = -1;
276 }
277 };
278 }
279 };
280 }
281
282 if (getNextXmlElement(elementName, indexOfFirst) == -1) {
283 return this.children[indexOfFirst];
284 } else {
285 return new ElementCollection() {
286 protected ElementCollection getResult(final String property) {
287 return new ComplexElementCollection(new XmlList[]{XmlList.this},
288 new int[] {indexOfFirst},
289 new String[] {elementName},
290 property);
291 }
292
293 /***
294 *
295 * Used by the Invoker when it wants to iterate over this object
296 *
297 * @return
298 */
299 public ElementIterator iterator() {
300 return new ElementIterator(new XmlList[]{XmlList.this}, new int[]{indexOfFirst}) {
301 protected void findNextChild() {
302 this.nextParentElements[0] = XmlList.this.getNextXmlElement(elementName, this.nextParentElements[0]);
303 }
304 };
305 }
306 };
307 }
308 }
309 }
310
311 public Object getAt(final int index) {
312 if (index == 0) {
313 return this;
314 } else {
315 throw new ArrayIndexOutOfBoundsException(index);
316 }
317 }
318
319 public int size() {
320 return 1;
321 }
322
323 public Object invokeMethod(final String name, final Object args) {
324 if ("attributes".equals(name)) {
325 return this.attributes;
326 } else if ("name".equals(name)) {
327 return this.name;
328 } else if ("children".equals(name)) {
329 return this.children;
330 } else if ("contents".equals(name)) {
331 return new Buildable() {
332 public void build(GroovyObject builder) {
333 buildChildren(builder);
334 }
335 };
336 } else if ("text".equals(name)) {
337 return text();
338 } else if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) {
339 return getProperty((String)((Object[])args)[0]);
340 } else if ("depthFirst".equals(name)) {
341
342
343
344
345 return new GroovyObjectSupport() {
346 public Object invokeMethod(final String name, final Object args) {
347 if ("getAt".equals(name) && ((Object[])args)[0] instanceof String) {
348 return getProperty((String)((Object[])args)[0]);
349 } else {
350 return XmlList.this.invokeMethod(name, args);
351 }
352 }
353
354 public Object getProperty(final String property) {
355 if (property.startsWith("@")) {
356 return XmlList.this.getProperty(property);
357 } else {
358 final List result = new LinkedList();
359
360 depthFirstGetProperty(property, XmlList.this.children, result);
361
362 return result;
363 }
364 }
365
366 private void depthFirstGetProperty(final String property, final Object[] contents, final List result) {
367 for (int i = 0; i != contents.length; i++) {
368 final Object item = contents[i];
369
370 if (item instanceof XmlList) {
371 if (((XmlList)item).name.equals(property)) {
372 result.add(item);
373 }
374
375 depthFirstGetProperty(property, ((XmlList)item).children, result);
376 }
377 }
378 }
379 };
380 } else {
381 return getMetaClass().invokeMethod(this, name, args);
382 }
383 }
384
385
386
387
388 public Writer writeTo(Writer out) throws IOException {
389
390 for (int i = 0; i != this.children.length; i++) {
391 final Object child = this.children[i];
392
393 if (child instanceof String) {
394 out.write((String)child);
395 } else {
396 ((XmlList)child).writeTo(out);
397 }
398 }
399
400 return out;
401 }
402
403
404
405
406 public void build(final GroovyObject builder) {
407
408 final Closure rest = new Closure(null) {
409 public Object doCall(final Object o) {
410 buildChildren(builder);
411
412 return null;
413 }
414 };
415
416 builder.invokeMethod(this.name, new Object[]{this.attributes, rest});
417
418 }
419
420 public String toString() {
421 return text();
422 }
423
424 private String text() {
425 final StringBuffer buff = new StringBuffer();
426
427 for (int i = 0; i != this.children.length; i++) {
428 final Object child = this.children[i];
429
430 if (child instanceof String) {
431 buff.append(child);
432 } else {
433 buff.append(((XmlList)child).text());
434 }
435 }
436
437 return buff.toString();
438 }
439
440 private void buildChildren(final GroovyObject builder) {
441 for (int i = 0; i != this.children.length; i++) {
442 if (this.children[i] instanceof Buildable) {
443 ((Buildable)this.children[i]).build(builder);
444 } else {
445 builder.getProperty("mkp");
446 builder.invokeMethod("yield", new Object[]{this.children[i]});
447 }
448 }
449 }
450
451 protected int getNextXmlElement(final String name, final int lastFound) {
452 for (int i = lastFound + 1; i < this.children.length; i++) {
453 final Object item = this.children[i];
454
455 if (item instanceof XmlList && ((XmlList)item).name.equals(name)) {
456 return i;
457 }
458 }
459
460 return -1;
461 }
462 }
463
464 abstract class ElementIterator implements Iterator {
465 protected final XmlList[] parents;
466 protected final int[] nextParentElements;
467
468 protected ElementIterator(final XmlList[] parents, int[] nextParentElements) {
469 this.parents = new XmlList[parents.length];
470 System.arraycopy(parents, 0, this.parents, 0, parents.length);
471
472 this.nextParentElements = new int[nextParentElements.length];
473 System.arraycopy(nextParentElements, 0, this.nextParentElements, 0, nextParentElements.length);
474 }
475
476
477
478
479 public boolean hasNext() {
480 return this.nextParentElements[0] != -1;
481 }
482
483
484
485
486 public Object next() {
487 final Object result = this.parents[0].children[this.nextParentElements[0]];
488
489 findNextChild();
490
491 return result;
492 }
493
494
495
496
497 public void remove() {
498 throw new UnsupportedOperationException();
499 }
500
501 protected abstract void findNextChild();
502 }
503
504 abstract class ElementCollection extends GroovyObjectSupport {
505 private int count = -1;
506
507 public abstract ElementIterator iterator();
508
509
510
511
512 public Object getProperty(final String property) {
513 final ElementCollection result = getResult(property);
514 final Iterator iterator = result.iterator();
515
516 if (iterator.hasNext()) {
517
518
519
520 final Object first = iterator.next();
521
522 if (!iterator.hasNext()) {
523 return first;
524 }
525 }
526
527 return result;
528 }
529
530 protected abstract ElementCollection getResult(String property);
531
532 public synchronized Object getAt(int index) {
533 if (index >= 0) {
534 final Iterator iter = iterator();
535
536 while (iter.hasNext()) {
537 if (index-- == 0) {
538 return iter.next();
539 } else {
540 iter.next();
541 }
542 }
543 }
544
545 throw new ArrayIndexOutOfBoundsException(index);
546 }
547
548 public synchronized int size() {
549 if (this.count == -1) {
550 final Iterator iter = iterator();
551
552 this.count = 0;
553
554 while (iter.hasNext()) {
555 this.count++;
556 iter.next();
557 }
558 }
559 return this.count;
560 }
561 }
562
563 class ComplexElementCollection extends ElementCollection {
564 private final XmlList[] parents;
565 private final int[] nextParentElements;
566 private final String[] parentElementNames;
567
568 public ComplexElementCollection(final XmlList[] parents,
569 final int[] nextParentElements,
570 final String[] parentElementNames,
571 final String childElementName)
572 {
573 this.parents = new XmlList[parents.length + 1];
574 this.parents[0] = (XmlList)parents[0].children[nextParentElements[0]];
575 System.arraycopy(parents, 0, this.parents, 1, parents.length);
576
577 this.nextParentElements = new int[nextParentElements.length + 1];
578 this.nextParentElements[0] = -1;
579 System.arraycopy(nextParentElements, 0, this.nextParentElements, 1, nextParentElements.length);
580
581 this.parentElementNames = new String[parentElementNames.length + 1];
582 this.parentElementNames[0] = childElementName;
583 System.arraycopy(parentElementNames, 0, this.parentElementNames, 1, parentElementNames.length);
584
585
586
587
588
589 final ElementIterator iter = this.iterator();
590
591 iter.findNextChild();
592
593 this.nextParentElements[0] = iter.nextParentElements[0];
594 }
595
596 protected ElementCollection getResult(final String property) {
597 return new ComplexElementCollection(this.parents,
598 this.nextParentElements,
599 this.parentElementNames,
600 property);
601 }
602
603 /***
604 *
605 * Used by the Invoker when it wants to iterate over this object
606 *
607 * @return
608 */
609 public ElementIterator iterator() {
610 return new ElementIterator(this.parents, this.nextParentElements) {
611 protected void findNextChild() {
612 this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], this.nextParentElements[0]);
613
614 while (this.nextParentElements[0] == -1) {
615 this.parents[0] = findNextParent(1);
616
617 if (this.parents[0] == null) {
618 return;
619 } else {
620 this.nextParentElements[0] = this.parents[0].getNextXmlElement(ComplexElementCollection.this.parentElementNames[0], -1);
621 }
622 }
623 }
624
625 private XmlList findNextParent(final int i) {
626 if (i == this.nextParentElements.length) return null;
627
628 this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], this.nextParentElements[i]);
629
630 while (this.nextParentElements[i] == -1) {
631 this.parents[i] = findNextParent(i + 1);
632
633 if (this.parents[i] == null) {
634 return null;
635 } else {
636 this.nextParentElements[i] = this.parents[i].getNextXmlElement(ComplexElementCollection.this.parentElementNames[i], -1);
637 }
638 }
639
640 return (XmlList)this.parents[i].children[this.nextParentElements[i]];
641 }
642 };
643 }
644 }