Frames | No Frames |
1: /* DoParse.java -- 2: Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: package gnu.xml.util; 39: 40: import java.io.IOException; 41: 42: import org.xml.sax.ErrorHandler; 43: import org.xml.sax.InputSource; 44: import org.xml.sax.SAXException; 45: import org.xml.sax.SAXParseException; 46: import org.xml.sax.XMLReader; 47: import org.xml.sax.helpers.XMLReaderFactory; 48: 49: import gnu.xml.pipeline.EventConsumer; 50: import gnu.xml.pipeline.EventFilter; 51: import gnu.xml.pipeline.NSFilter; 52: import gnu.xml.pipeline.PipelineFactory; 53: import gnu.xml.pipeline.TeeConsumer; 54: import gnu.xml.pipeline.ValidationConsumer; 55: import gnu.xml.pipeline.WellFormednessFilter; 56: 57: /** 58: * This class provides a driver which may be invoked from the command line 59: * to process a document using a SAX2 parser and a specified XML processing 60: * pipeline. 61: * This facilitates some common types of command line tools, such as parsing an 62: * XML document in order test it for well formedness or validity. 63: * 64: * <p>The SAX2 XMLReaderFactory should return a SAX2 XML parser which 65: * supports both of the standardized extension handlers (for declaration 66: * and lexical events). That parser will be used to produce events. 67: * 68: * <p>The first parameter to the command gives the name of the document that 69: * will be given to that processor. If it is a file name, it is converted 70: * to a URL first. 71: * 72: * <p>The second parameter describes a simple processing pipeline, and will 73: * be used as input to {@link gnu.xml.pipeline.PipelineFactory} 74: * methods which identify the processing to be done. Examples of such a 75: * pipeline include <pre> 76: * 77: * nsfix | validate <em>to validate the input document </em> 78: * nsfix | write ( stdout ) <em>to echo the file as XML text</em> 79: * dom | nsfix | write ( stdout ) <em>parse into DOM, print the result</em> 80: * </pre> 81: * 82: * <p> Relatively complex pipelines can be described on the command line, but 83: * not all interesting ones will require as little configuration as can be done 84: * in that way. Put filters like "nsfix", perhaps followed by "validate", 85: * at the front of the pipeline so they can be optimized out if a parser 86: * supports those modes natively. 87: * 88: * <p> If the parsing is aborted for any reason, the JVM will exit with a 89: * failure code. If a validating parse was done then both validation and 90: * well formedness errors will cause a failure. A non-validating parse 91: * will report failure on well formedness errors. 92: * 93: * @see gnu.xml.pipeline.PipelineFactory 94: * 95: * @author David Brownell 96: */ 97: final public class DoParse 98: { 99: private DoParse () { /* no instances allowed */ } 100: 101: // first reported nonrecoverable error 102: private static SAXParseException fatal; 103: 104: // error categories 105: private static int errorCount; 106: private static int fatalCount; 107: 108: /** 109: * Command line invoker for this class; pass a filename or URL 110: * as the first argument, and a pipeline description as the second. 111: * Make sure to use filters to condition the input to stages that 112: * require it; an <em>nsfix</em> filter will be a common requirement, 113: * to restore syntax that SAX2 parsers delete by default. Some 114: * conditioning filters may be eliminated by setting parser options. 115: * (For example, "nsfix" can set the "namespace-prefixes" feature to 116: * a non-default value of "true". In the same way, "validate" can set 117: * the "validation" feature to "true".) 118: */ 119: public static void main (String argv []) 120: throws IOException 121: { 122: int exitStatus = 1; 123: 124: if (argv.length != 2) { 125: System.err.println ("Usage: DoParse [filename|URL] pipeline-spec"); 126: System.err.println ("Example pipeline specs:"); 127: System.err.println (" 'nsfix | validate'"); 128: System.err.println ( 129: " ... restore namespace syntax, validate"); 130: System.err.println (" 'nsfix | write ( stdout )'"); 131: System.err.println ( 132: " ... restore namespace syntax, write to stdout as XML" 133: ); 134: System.exit (1); 135: } 136: 137: try { 138: // 139: // Get input source for specified document (or try ;-) 140: // 141: argv [0] = Resolver.getURL (argv [0]); 142: InputSource input = new InputSource (argv [0]); 143: 144: // 145: // Get the producer, using the system default parser (which 146: // can be overridden for this particular invocation). 147: // 148: // And the pipeline, using commandline options. 149: // 150: XMLReader producer; 151: EventConsumer consumer; 152: 153: producer = XMLReaderFactory.createXMLReader (); 154: 155: // 156: // XXX pipeline factory now has a pre-tokenized input 157: // method, use it ... that way at least some params 158: // can be written using quotes (have spaces, ...) 159: // 160: consumer = PipelineFactory.createPipeline (argv [1]); 161: 162: // 163: // XXX want commandline option for tweaking error handler. 164: // Want to be able to present warnings. 165: // 166: producer.setErrorHandler (new MyErrorHandler ()); 167: 168: // XXX need facility enabling resolving to local DTDs 169: 170: // 171: // Parse. The pipeline may get optimized a bit, so we 172: // can't always fail cleanly for validation without taking 173: // a look at the filter stages. 174: // 175: EventFilter.bind (producer, consumer); 176: producer.parse (input); 177: 178: try { 179: if (producer.getFeature ( 180: "http://org.xml/sax/features/validation")) 181: exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0; 182: else if (fatalCount == 0) 183: exitStatus = 0; 184: } catch (SAXException e) { 185: if (hasValidator (consumer)) 186: exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0; 187: else if (fatalCount == 0) 188: exitStatus = 0; 189: } 190: 191: } catch (java.net.MalformedURLException e) { 192: System.err.println ("** Malformed URL: " + e.getMessage ()); 193: System.err.println ("Is '" + argv [0] + "' a non-existent file?"); 194: e.printStackTrace (); 195: // e.g. FNF 196: 197: } catch (SAXParseException e) { 198: if (e != fatal) { 199: System.err.print (printParseException ("Parsing Aborted", e)); 200: e.printStackTrace (); 201: if (e.getException () != null) { 202: System.err.println ("++ Wrapped exception:"); 203: e.getException ().printStackTrace (); 204: } 205: } 206: 207: } catch (SAXException e) { 208: Exception x = e; 209: if (e.getException () != null) 210: x = e.getException (); 211: x.printStackTrace (); 212: 213: } catch (Throwable t) { 214: t.printStackTrace (); 215: } 216: 217: System.exit (exitStatus); 218: } 219: 220: // returns true if saw a validator (before end or unrecognized node) 221: // false otherwise 222: private static boolean hasValidator (EventConsumer e) 223: { 224: if (e == null) 225: return false; 226: if (e instanceof ValidationConsumer) 227: return true; 228: if (e instanceof TeeConsumer) { 229: TeeConsumer t = (TeeConsumer) e; 230: return hasValidator (t.getFirst ()) 231: || hasValidator (t.getRest ()); 232: } 233: if (e instanceof WellFormednessFilter 234: || e instanceof NSFilter 235: ) 236: return hasValidator (((EventFilter)e).getNext ()); 237: 238: // else ... gee, we can't know. Assume not. 239: 240: return false; 241: } 242: 243: static class MyErrorHandler implements ErrorHandler 244: { 245: // dump validation errors, but continue 246: public void error (SAXParseException e) 247: throws SAXParseException 248: { 249: errorCount++; 250: System.err.print (printParseException ("Error", e)); 251: } 252: 253: public void warning (SAXParseException e) 254: throws SAXParseException 255: { 256: // System.err.print (printParseException ("Warning", e)); 257: } 258: 259: // try to continue fatal errors, in case a parser reports more 260: public void fatalError (SAXParseException e) 261: throws SAXParseException 262: { 263: fatalCount++; 264: if (fatal == null) 265: fatal = e; 266: System.err.print (printParseException ("Nonrecoverable Error", e)); 267: } 268: } 269: 270: static private String printParseException ( 271: String label, 272: SAXParseException e 273: ) { 274: StringBuffer buf = new StringBuffer (); 275: int temp; 276: 277: buf.append ("** "); 278: buf.append (label); 279: buf.append (": "); 280: buf.append (e.getMessage ()); 281: buf.append ('\n'); 282: if (e.getSystemId () != null) { 283: buf.append (" URI: "); 284: buf.append (e.getSystemId ()); 285: buf.append ('\n'); 286: } 287: if ((temp = e.getLineNumber ()) != -1) { 288: buf.append (" line: "); 289: buf.append (temp); 290: buf.append ('\n'); 291: } 292: if ((temp = e.getColumnNumber ()) != -1) { 293: buf.append (" char: "); 294: buf.append (temp); 295: buf.append ('\n'); 296: } 297: 298: return buf.toString (); 299: } 300: }