1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46 package groovy.util;
47
48 import groovy.xml.QName;
49 import groovy.xml.FactorySupport;
50
51 import java.io.File;
52 import java.io.FileInputStream;
53 import java.io.IOException;
54 import java.io.InputStream;
55 import java.io.Reader;
56 import java.io.StringReader;
57 import java.util.ArrayList;
58 import java.util.HashMap;
59 import java.util.List;
60 import java.util.Map;
61
62 import javax.xml.parsers.ParserConfigurationException;
63 import javax.xml.parsers.SAXParser;
64 import javax.xml.parsers.SAXParserFactory;
65
66 import org.xml.sax.*;
67
68 /***
69 * A helper class for parsing XML into a tree of Node instances for
70 * a simple way of processing XML. This parser does not preserve the
71 * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc.
72 * This parser ignores comments and processing instructions and converts the
73 * XML into a Node for each element in the XML with attributes
74 * and child Nodes and Strings. This simple model is sufficient for
75 * most simple use cases of processing XML.
76 *
77 * @author <a href="mailto:james@coredevelopers.net">James Strachan</a>
78 * @version $Revision: 4132 $
79 */
80 public class XmlParser implements ContentHandler {
81
82 private StringBuffer bodyText = new StringBuffer();
83 private List stack = new ArrayList();
84 private Locator locator;
85 private XMLReader reader;
86 private Node parent;
87 private boolean trimWhitespace = true;
88
89 public XmlParser() throws ParserConfigurationException, SAXException {
90 this(false, true);
91 }
92
93 public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException {
94 SAXParserFactory factory = FactorySupport.createSaxParserFactory();
95 factory.setNamespaceAware(namespaceAware);
96 factory.setValidating(validating);
97 reader = factory.newSAXParser().getXMLReader();
98 }
99
100 public XmlParser(XMLReader reader) {
101 this.reader = reader;
102 }
103
104 public XmlParser(SAXParser parser) throws SAXException {
105 reader = parser.getXMLReader();
106 }
107
108
109 /***
110 * Parses the content of the given file as XML turning it into a tree
111 * of Nodes
112 */
113 public Node parse(File file) throws IOException, SAXException {
114 InputSource input = new InputSource(new FileInputStream(file));
115 input.setSystemId("file://" + file.getAbsolutePath());
116 getXMLReader().parse(input);
117 return parent;
118
119 }
120
121 /***
122 * Parse the content of the specified input source into a tree of Nodes.
123 */
124 public Node parse(InputSource input) throws IOException, SAXException {
125 getXMLReader().parse(input);
126 return parent;
127 }
128
129 /***
130 * Parse the content of the specified input stream into a tree of Nodes.
131 * Note that using this method will not provide the parser with any URI
132 * for which to find DTDs etc
133 */
134 public Node parse(InputStream input) throws IOException, SAXException {
135 InputSource is = new InputSource(input);
136 getXMLReader().parse(is);
137 return parent;
138 }
139
140 /***
141 * Parse the content of the specified reader into a tree of Nodes.
142 * Note that using this method will not provide the parser with any URI
143 * for which to find DTDs etc
144 */
145 public Node parse(Reader in) throws IOException, SAXException {
146 InputSource is = new InputSource(in);
147 getXMLReader().parse(is);
148 return parent;
149 }
150
151 /***
152 * Parse the content of the specified URI into a tree of Nodes
153 */
154 public Node parse(String uri) throws IOException, SAXException {
155 InputSource is = new InputSource(uri);
156 getXMLReader().parse(is);
157 return parent;
158 }
159
160 /***
161 * A helper method to parse the given text as XML
162 *
163 * @param text
164 */
165 public Node parseText(String text) throws IOException, SAXException {
166 return parse(new StringReader(text));
167 }
168
169
170
171
172
173
174 public DTDHandler getDTDHandler() {
175 return this.reader.getDTDHandler();
176 }
177
178
179
180
181 public EntityResolver getEntityResolver() {
182 return this.reader.getEntityResolver();
183 }
184
185
186
187
188 public ErrorHandler getErrorHandler() {
189 return this.reader.getErrorHandler();
190 }
191
192
193
194
195 public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
196 return this.reader.getFeature(uri);
197 }
198
199
200
201
202 public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
203 return this.reader.getProperty(uri);
204 }
205
206
207
208
209 public void setDTDHandler(final DTDHandler dtdHandler) {
210 this.reader.setDTDHandler(dtdHandler);
211 }
212
213
214
215
216 public void setEntityResolver(final EntityResolver entityResolver) {
217 this.reader.setEntityResolver(entityResolver);
218 }
219
220
221
222
223 public void setErrorHandler(final ErrorHandler errorHandler) {
224 this.reader.setErrorHandler(errorHandler);
225 }
226
227
228
229
230 public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
231 this.reader.setFeature(uri, value);
232 }
233
234
235
236
237 public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
238 this.reader.setProperty(uri, value);
239 }
240
241
242
243 public void startDocument() throws SAXException {
244 parent = null;
245 }
246
247 public void endDocument() throws SAXException {
248 stack.clear();
249 }
250
251 public void startElement(String namespaceURI, String localName, String qName, Attributes list)
252 throws SAXException {
253 addTextToNode();
254
255 Object name = getElementName(namespaceURI, localName, qName);
256
257 int size = list.getLength();
258 Map attributes = new HashMap(size);
259 for (int i = 0; i < size; i++) {
260 Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i));
261 String value = list.getValue(i);
262 attributes.put(attributeName, value);
263 }
264 parent = new Node(parent, name, attributes, new ArrayList());
265 stack.add(parent);
266 }
267
268 public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
269 addTextToNode();
270
271 if (!stack.isEmpty()) {
272 stack.remove(stack.size() - 1);
273 if (!stack.isEmpty()) {
274 parent = (Node) stack.get(stack.size() - 1);
275 }
276 }
277 }
278
279 public void characters(char buffer[], int start, int length) throws SAXException {
280 bodyText.append(buffer, start, length);
281 }
282
283 public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException {
284 }
285
286 public void endPrefixMapping(String prefix) throws SAXException {
287 }
288
289 public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException {
290 }
291
292 public void processingInstruction(String target, String data) throws SAXException {
293 }
294
295 public Locator getDocumentLocator() {
296 return locator;
297 }
298
299 public void setDocumentLocator(Locator locator) {
300 this.locator = locator;
301 }
302
303 public void skippedEntity(String name) throws SAXException {
304 }
305
306
307
308 protected XMLReader getXMLReader() {
309 reader.setContentHandler(this);
310 return reader;
311 }
312
313 protected void addTextToNode() {
314 String text = bodyText.toString();
315 if (trimWhitespace) {
316 text = text.trim();
317 }
318 if (text.length() > 0) {
319 parent.children().add(text);
320 }
321 bodyText = new StringBuffer();
322 }
323
324 protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException {
325 String name = localName;
326 if ((name == null) || (name.length() < 1)) {
327 name = qName;
328 }
329 if (namespaceURI == null || namespaceURI.length() <= 0) {
330 return name;
331 }
332 else {
333 return new QName(namespaceURI, name, qName);
334 }
335 }
336 }