1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package groovy.util;
19
20 import groovy.util.slurpersupport.GPathResult;
21 import groovy.util.slurpersupport.Node;
22 import groovy.util.slurpersupport.NodeChild;
23 import groovy.xml.FactorySupport;
24
25 import java.io.File;
26 import java.io.FileInputStream;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.Reader;
30 import java.io.StringReader;
31 import java.net.URL;
32 import java.util.HashMap;
33 import java.util.Hashtable;
34 import java.util.Map;
35 import java.util.Stack;
36
37 import javax.xml.parsers.ParserConfigurationException;
38 import javax.xml.parsers.SAXParser;
39 import javax.xml.parsers.SAXParserFactory;
40
41 import org.xml.sax.Attributes;
42 import org.xml.sax.DTDHandler;
43 import org.xml.sax.EntityResolver;
44 import org.xml.sax.ErrorHandler;
45 import org.xml.sax.InputSource;
46 import org.xml.sax.SAXException;
47 import org.xml.sax.SAXNotRecognizedException;
48 import org.xml.sax.SAXNotSupportedException;
49 import org.xml.sax.XMLReader;
50 import org.xml.sax.helpers.DefaultHandler;
51
52 /***
53 * @author John Wilson
54 *
55 */
56
57 public class XmlSlurper extends DefaultHandler {
58 private final XMLReader reader;
59 private Node currentNode = null;
60 private final Stack stack = new Stack();
61 private final StringBuffer charBuffer = new StringBuffer();
62 private final Map namespaceTagHints = new Hashtable();
63 private boolean keepWhitespace = false;
64
65 public XmlSlurper() throws ParserConfigurationException, SAXException {
66 this(false, true);
67 }
68
69 public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException {
70 SAXParserFactory factory = FactorySupport.createSaxParserFactory();
71 factory.setNamespaceAware(namespaceAware);
72 factory.setValidating(validating);
73 this.reader = factory.newSAXParser().getXMLReader();
74 }
75
76 public XmlSlurper(final XMLReader reader) {
77 this.reader = reader;
78 }
79
80 public XmlSlurper(final SAXParser parser) throws SAXException {
81 this(parser.getXMLReader());
82 }
83
84 /***
85 * @param keepWhitespace
86 *
87 * If true then whitespace before elements is kept.
88 * The deafult is to discard the whitespace.
89 */
90 public void setKeepWhitespace(boolean keepWhitespace) {
91 this.keepWhitespace = keepWhitespace;
92 }
93
94 /***
95 * @return The GPathResult instance created by consuming a stream of SAX events
96 * Note if one of the parse methods has been called then this returns null
97 * Note if this is called more than once all calls after the first will return null
98 *
99 */
100 public GPathResult getDocument() {
101 try {
102 return new NodeChild(this.currentNode, null, this.namespaceTagHints);
103 } finally {
104 this.currentNode = null;
105 }
106 }
107
108 /***
109 * Parse the content of the specified input source into a GPathResult object
110 *
111 * @param input
112 * @return An object which supports GPath expressions
113 * @throws IOException
114 * @throws SAXException
115 */
116 public GPathResult parse(final InputSource input) throws IOException, SAXException {
117 this.reader.setContentHandler(this);
118 this.reader.parse(input);
119
120 return getDocument();
121
122 }
123
124 /***
125 * Parses the content of the given file as XML turning it into a GPathResult object
126 *
127 * @param file
128 * @return An object which supports GPath expressions
129 * @throws IOException
130 * @throws SAXException
131 */
132 public GPathResult parse(final File file) throws IOException, SAXException {
133 final InputSource input = new InputSource(new FileInputStream(file));
134
135 input.setSystemId("file://" + file.getAbsolutePath());
136
137 return parse(input);
138
139 }
140
141 /***
142 * Parse the content of the specified input stream into an GPathResult Object.
143 * Note that using this method will not provide the parser with any URI
144 * for which to find DTDs etc
145 *
146 * @param input
147 * @return An object which supports GPath expressions
148 * @throws IOException
149 * @throws SAXException
150 */
151 public GPathResult parse(final InputStream input) throws IOException, SAXException {
152 return parse(new InputSource(input));
153 }
154
155 /***
156 * Parse the content of the specified reader into a GPathResult Object.
157 * Note that using this method will not provide the parser with any URI
158 * for which to find DTDs etc
159 *
160 * @param in
161 * @return An object which supports GPath expressions
162 * @throws IOException
163 * @throws SAXException
164 */
165 public GPathResult parse(final Reader in) throws IOException, SAXException {
166 return parse(new InputSource(in));
167 }
168
169 /***
170 * Parse the content of the specified URI into a GPathResult Object
171 *
172 * @param uri
173 * @return An object which supports GPath expressions
174 * @throws IOException
175 * @throws SAXException
176 */
177 public GPathResult parse(final String uri) throws IOException, SAXException {
178 return parse(new InputSource(uri));
179 }
180
181 /***
182 * A helper method to parse the given text as XML
183 *
184 * @param text
185 * @return An object which supports GPath expressions
186 */
187 public GPathResult parseText(final String text) throws IOException, SAXException {
188 return parse(new StringReader(text));
189 }
190
191
192
193
194
195
196
197 public DTDHandler getDTDHandler() {
198 return this.reader.getDTDHandler();
199 }
200
201
202
203
204 public EntityResolver getEntityResolver() {
205 return this.reader.getEntityResolver();
206 }
207
208
209
210
211 public ErrorHandler getErrorHandler() {
212 return this.reader.getErrorHandler();
213 }
214
215
216
217
218 public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
219 return this.reader.getFeature(uri);
220 }
221
222
223
224
225 public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
226 return this.reader.getProperty(uri);
227 }
228
229
230
231
232 public void setDTDHandler(final DTDHandler dtdHandler) {
233 this.reader.setDTDHandler(dtdHandler);
234 }
235
236
237
238
239 public void setEntityResolver(final EntityResolver entityResolver) {
240 this.reader.setEntityResolver(entityResolver);
241 }
242
243 /***
244 * Resolves entities against using the suppied URL as the base for relative URLs
245 *
246 * @param base
247 * The URL used to resolve relative URLs
248 */
249 public void setEntityBaseUrl(final URL base) {
250 this.reader.setEntityResolver(new EntityResolver() {
251 public InputSource resolveEntity(final String publicId, final String systemId) throws IOException {
252 return new InputSource(new URL(base, systemId).openStream());
253 }
254 });
255 }
256
257
258
259
260 public void setErrorHandler(final ErrorHandler errorHandler) {
261 this.reader.setErrorHandler(errorHandler);
262 }
263
264
265
266
267 public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
268 this.reader.setFeature(uri, value);
269 }
270
271
272
273
274 public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
275 this.reader.setProperty(uri, value);
276 }
277
278
279
280
281
282
283
284
285 public void startDocument() throws SAXException {
286 this.currentNode = null;
287 this.charBuffer.setLength(0);
288 }
289
290
291
292
293 public void startPrefixMapping(final String tag, final String uri) throws SAXException {
294 this.namespaceTagHints.put(tag, uri);
295 }
296
297
298
299
300 public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException {
301 addCdata();
302
303 final Map attributes = new HashMap();
304 final Map attributeNamespaces = new HashMap();
305
306 for (int i = atts.getLength() - 1; i != -1; i--) {
307 if (atts.getURI(i).length() == 0) {
308 attributes.put(atts.getQName(i), atts.getValue(i));
309 } else {
310 attributes.put(atts.getLocalName(i), atts.getValue(i));
311 attributeNamespaces.put(atts.getLocalName(i), atts.getURI(i));
312 }
313
314 }
315
316 final Node newElement;
317
318 if (namespaceURI.length() == 0){
319 newElement = new Node(this.currentNode, qName, attributes, attributeNamespaces, namespaceURI);
320 } else {
321 newElement = new Node(this.currentNode, localName, attributes, attributeNamespaces, namespaceURI);
322 }
323
324 if (this.currentNode != null) {
325 this.currentNode.addChild(newElement);
326 }
327
328 this.stack.push(this.currentNode);
329 this.currentNode = newElement;
330 }
331
332
333
334
335 public void characters(final char[] ch, final int start, final int length) throws SAXException {
336 this.charBuffer.append(ch, start, length);
337 }
338
339
340
341
342 public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException {
343 addCdata();
344
345 final Object oldCurrentNode = this.stack.pop();
346
347 if (oldCurrentNode != null) {
348 this.currentNode = (Node)oldCurrentNode;
349 }
350 }
351
352
353
354
355 public void endDocument() throws SAXException {
356 }
357
358
359
360
361 /***
362 *
363 */
364 private void addCdata() {
365 if (this.charBuffer.length() != 0) {
366
367
368
369
370
371
372 final String cdata = this.charBuffer.toString();
373
374 this.charBuffer.setLength(0);
375 if (this.keepWhitespace || cdata.trim().length() != 0) {
376 this.currentNode.addChild(cdata);
377 }
378 }
379 }
380 }