View Javadoc

1   /*
2    $Id: XmlParser.java 4132 2006-10-18 08:24:58Z paulk $
3   
4    Copyright 2003 (C) James Strachan and Bob Mcwhirter. All Rights Reserved.
5   
6    Redistribution and use of this software and associated documentation
7    ("Software"), with or without modification, are permitted provided
8    that the following conditions are met:
9   
10   1. Redistributions of source code must retain copyright
11      statements and notices.  Redistributions must also contain a
12      copy of this document.
13  
14   2. Redistributions in binary form must reproduce the
15      above copyright notice, this list of conditions and the
16      following disclaimer in the documentation and/or other
17      materials provided with the distribution.
18  
19   3. The name "groovy" must not be used to endorse or promote
20      products derived from this Software without prior written
21      permission of The Codehaus.  For written permission,
22      please contact info@codehaus.org.
23  
24   4. Products derived from this Software may not be called "groovy"
25      nor may "groovy" appear in their names without prior written
26      permission of The Codehaus. "groovy" is a registered
27      trademark of The Codehaus.
28  
29   5. Due credit should be given to The Codehaus -
30      http://groovy.codehaus.org/
31  
32   THIS SOFTWARE IS PROVIDED BY THE CODEHAUS AND CONTRIBUTORS
33   ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
34   NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
35   FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
36   THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43   OF THE POSSIBILITY OF SUCH DAMAGE.
44  
45   */
46  package groovy.util;
47  
48  import groovy.xml.QName;
49  import groovy.xml.FactorySupport;
50  
51  import java.io.File;
52  import java.io.FileInputStream;
53  import java.io.IOException;
54  import java.io.InputStream;
55  import java.io.Reader;
56  import java.io.StringReader;
57  import java.util.ArrayList;
58  import java.util.HashMap;
59  import java.util.List;
60  import java.util.Map;
61  
62  import javax.xml.parsers.ParserConfigurationException;
63  import javax.xml.parsers.SAXParser;
64  import javax.xml.parsers.SAXParserFactory;
65  
66  import org.xml.sax.*;
67  
68  /***
69   * A helper class for parsing XML into a tree of Node instances for 
70   * a simple way of processing XML. This parser does not preserve the
71   * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc.
72   * This parser ignores comments and processing instructions and converts the
73   * XML into a Node for each element in the XML with attributes
74   * and child Nodes and Strings. This simple model is sufficient for
75   * most simple use cases of processing XML.
76   * 
77   * @author <a href="mailto:james@coredevelopers.net">James Strachan</a>
78   * @version $Revision: 4132 $
79   */
80  public class XmlParser implements ContentHandler {
81  
82      private StringBuffer bodyText = new StringBuffer();
83      private List stack = new ArrayList();
84      private Locator locator;
85      private XMLReader reader;
86      private Node parent;
87      private boolean trimWhitespace = true;
88  
89      public XmlParser() throws ParserConfigurationException, SAXException {
90          this(false, true);
91      }
92  
93      public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException {
94          SAXParserFactory factory = FactorySupport.createSaxParserFactory();
95          factory.setNamespaceAware(namespaceAware);
96          factory.setValidating(validating);
97          reader = factory.newSAXParser().getXMLReader();
98      }
99  
100     public XmlParser(XMLReader reader) {
101         this.reader = reader;
102     }
103 
104     public XmlParser(SAXParser parser) throws SAXException {
105         reader = parser.getXMLReader();
106     }
107 
108 
109     /***
110      * Parses the content of the given file as XML turning it into a tree
111      * of Nodes
112      */
113     public Node parse(File file) throws IOException, SAXException {
114         InputSource input = new InputSource(new FileInputStream(file));
115         input.setSystemId("file://" + file.getAbsolutePath());
116         getXMLReader().parse(input);
117         return parent;
118 
119     }
120 
121     /***
122      * Parse the content of the specified input source into a tree of Nodes.
123      */
124     public Node parse(InputSource input) throws IOException, SAXException {
125         getXMLReader().parse(input);
126         return parent;
127     }
128 
129     /***
130      * Parse the content of the specified input stream into a tree of Nodes.
131      * Note that using this method will not provide the parser with any URI
132      * for which to find DTDs etc
133      */
134     public Node parse(InputStream input) throws IOException, SAXException {
135         InputSource is = new InputSource(input);
136         getXMLReader().parse(is);
137         return parent;
138     }
139 
140     /***
141      * Parse the content of the specified reader into a tree of Nodes.
142      * Note that using this method will not provide the parser with any URI
143      * for which to find DTDs etc
144      */
145     public Node parse(Reader in) throws IOException, SAXException {
146         InputSource is = new InputSource(in);
147         getXMLReader().parse(is);
148         return parent;
149     }
150 
151     /***
152      * Parse the content of the specified URI into a tree of Nodes
153      */
154     public Node parse(String uri) throws IOException, SAXException {
155         InputSource is = new InputSource(uri);
156         getXMLReader().parse(is);
157         return parent;
158     }
159 
160     /***
161      * A helper method to parse the given text as XML
162      * 
163      * @param text
164      */
165     public Node parseText(String text) throws IOException, SAXException {
166         return parse(new StringReader(text));
167     }
168     // Delegated XMLReader methods
169     //------------------------------------------------------------------------
170 
171     /* (non-Javadoc)
172      * @see org.xml.sax.XMLReader#getDTDHandler()
173      */
174     public DTDHandler getDTDHandler() {
175         return this.reader.getDTDHandler();
176     }
177 
178     /* (non-Javadoc)
179      * @see org.xml.sax.XMLReader#getEntityResolver()
180      */
181     public EntityResolver getEntityResolver() {
182         return this.reader.getEntityResolver();
183     }
184 
185     /* (non-Javadoc)
186      * @see org.xml.sax.XMLReader#getErrorHandler()
187      */
188     public ErrorHandler getErrorHandler() {
189         return this.reader.getErrorHandler();
190     }
191 
192     /* (non-Javadoc)
193      * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
194      */
195     public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
196         return this.reader.getFeature(uri);
197     }
198 
199     /* (non-Javadoc)
200      * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
201      */
202     public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
203         return this.reader.getProperty(uri);
204     }
205 
206     /* (non-Javadoc)
207      * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
208      */
209     public void setDTDHandler(final DTDHandler dtdHandler) {
210         this.reader.setDTDHandler(dtdHandler);
211     }
212 
213     /* (non-Javadoc)
214      * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
215      */
216     public void setEntityResolver(final EntityResolver entityResolver) {
217         this.reader.setEntityResolver(entityResolver);
218     }
219 
220     /* (non-Javadoc)
221      * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
222      */
223     public void setErrorHandler(final ErrorHandler errorHandler) {
224         this.reader.setErrorHandler(errorHandler);
225     }
226 
227     /* (non-Javadoc)
228      * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
229      */
230     public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
231         this.reader.setFeature(uri, value);
232     }
233 
234     /* (non-Javadoc)
235      * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
236      */
237     public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
238          this.reader.setProperty(uri, value);
239     }
240 
241     // ContentHandler interface
242     //-------------------------------------------------------------------------                    
243     public void startDocument() throws SAXException {
244         parent = null;
245     }
246 
247     public void endDocument() throws SAXException {
248         stack.clear();
249     }
250 
251     public void startElement(String namespaceURI, String localName, String qName, Attributes list)
252         throws SAXException {
253         addTextToNode();
254 
255         Object name = getElementName(namespaceURI, localName, qName);
256 
257         int size = list.getLength();
258         Map attributes = new HashMap(size);
259         for (int i = 0; i < size; i++) {
260             Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i));
261             String value = list.getValue(i);
262             attributes.put(attributeName, value);
263         }
264         parent = new Node(parent, name, attributes, new ArrayList());
265         stack.add(parent);
266     }
267 
268     public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
269         addTextToNode();
270 
271         if (!stack.isEmpty()) {
272             stack.remove(stack.size() - 1);
273             if (!stack.isEmpty()) {
274                 parent = (Node) stack.get(stack.size() - 1);
275             }
276         }
277     }
278 
279     public void characters(char buffer[], int start, int length) throws SAXException {
280         bodyText.append(buffer, start, length);
281     }
282 
283     public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException {
284     }
285 
286     public void endPrefixMapping(String prefix) throws SAXException {
287     }
288 
289     public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException {
290     }
291 
292     public void processingInstruction(String target, String data) throws SAXException {
293     }
294 
295     public Locator getDocumentLocator() {
296         return locator;
297     }
298 
299     public void setDocumentLocator(Locator locator) {
300         this.locator = locator;
301     }
302 
303     public void skippedEntity(String name) throws SAXException {
304     }
305 
306     // Implementation methods
307     //-------------------------------------------------------------------------           
308     protected XMLReader getXMLReader() {
309         reader.setContentHandler(this);
310         return reader;
311     }
312 
313     protected void addTextToNode() {
314         String text = bodyText.toString();
315         if (trimWhitespace) {
316             text = text.trim();
317         }
318         if (text.length() > 0) {
319             parent.children().add(text);
320         }
321         bodyText = new StringBuffer();
322     }
323 
324     protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException {
325         String name = localName;
326         if ((name == null) || (name.length() < 1)) {
327             name = qName;
328         }
329         if (namespaceURI == null || namespaceURI.length() <= 0) {
330             return name;
331         }
332         else {
333             return new QName(namespaceURI, name, qName);
334         }
335     }
336 }