6 * Adapted from load-grammar-dom.cxx
7 * Written by Boris Kolpackov <boris@codesynthesis.com>
8 * Assigned, by the author, to the public domain
10 * This program uses Xerces-C++ DOM parser to load a set of schema files
11 * and then to validate a set of XML documents against these schemas. To
12 * build this program you will need Xerces-C++ 3.0.0 or later. For more
15 * http: *www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/
18 * Adaptation by Keith Marshall <keithmarshall@users.sourceforge.net>
19 * Copyright (C) 2013, MinGW.org Project
21 * This is free software. Permission is granted to copy, modify and
22 * redistribute this software, under the provisions of the GNU General
23 * Public License, Version 3, (or, at your option, any later version),
24 * as published by the Free Software Foundation; see the file COPYING
25 * for licensing details.
27 * Note, in particular, that this software is provided "as is", in the
28 * hope that it may prove useful, but WITHOUT WARRANTY OF ANY KIND; not
29 * even an implied WARRANTY OF MERCHANTABILITY, nor of FITNESS FOR ANY
30 * PARTICULAR PURPOSE. Under no circumstances will the author, or the
31 * MinGW Project, accept liability for any damages, however caused,
32 * arising from the use of this software.
35 #define __STDC_FORMAT_MACROS 1
36 #include <inttypes.h> /* for PRIu64 */
38 #include <cstdio> /* for fprintf() */
40 #include <memory> /* for std::auto_ptr */
41 #include <cstddef> /* for std::size_t */
43 #include <libgen.h> /* for basename() */
45 #include <xercesc/util/XMLUni.hpp>
46 #include <xercesc/util/XMLString.hpp>
47 #include <xercesc/util/PlatformUtils.hpp>
49 #include <xercesc/dom/DOM.hpp>
51 #include <xercesc/validators/common/Grammar.hpp>
52 #include <xercesc/framework/XMLGrammarPoolImpl.hpp>
55 using namespace xercesc;
57 #if _XERCES_VERSION < 30000
58 /* We need at least Xerces-C++ version 3.0.0
60 # error Xerces-C++ version >= 3.0.0 is required!
62 #elif _XERCES_VERSION >= 30100
63 /* We may wish to exploit some features which were not introduced
64 * until Xerces-C++ version 3.1.0
66 # define IF_XERCES_30100_PLUS( STATEMENT ) STATEMENT
69 /* We cannot use Xerces-C++ version 3.1.0 features; make them no-op.
71 # define IF_XERCES_30100_PLUS( STATEMENT )
74 class error_handler: public DOMErrorHandler
76 /* A locally defined class for capture of fault conditions, as
77 * reported by our DOM parsers.
82 error_handler( const char *rel): source(rel),
83 first_report(true), new_document(true), failed(false){}
85 /* Method to access recorded error condition status.
87 bool has_failed() const { return failed; }
89 /* Method to reset recorded status, in preparation for
90 * parsing a new document.
92 void reset(){ new_document = true; failed = false; }
94 /* Method to handle error conditions, on behalf of our
97 virtual bool handleError( const xercesc::DOMError& );
100 /* The type of XML input being parsed, recorded when we
101 * consturct the error handler for binding to a particular
106 /* Recording for error condition status.
108 bool first_report, new_document, failed;
112 error_handler::handleError( const xercesc::DOMError& condition )
114 /* Implementation of the error handler, which we will use to capture
115 * status, and report abnormal conditions detected by our DOM parsers.
117 bool warn = condition.getSeverity() == DOMError::DOM_SEVERITY_WARNING;
119 /* Record detection of any condition which is more severe than
122 if( ! warn ) failed = true;
124 /* Identify the location, within the current XML schema or document
125 * file, where the abnormality has been detected.
127 DOMLocator* loc( condition.getLocation() );
129 /* When this is the first abnormality detected within the current
130 * XML schema or document file...
134 /* ...but we've previously reported abnormalities within another
135 * input file, then separate the current report from diagnostics
136 * relating to that other file...
138 if( ! first_report ) fputc( '\n', stderr );
140 /* ...then, regardless of whatever may have gone before, format
141 * and emit a report header to identify the current file.
143 char *uri = XMLString::transcode( loc->getURI() );
144 fprintf( stderr, "Problem Report:\n%s: %s\n", source, uri );
145 XMLString::release( &uri );
147 /* Record that we've now emitted a report header and diagnostic
148 * for the current XML input file.
150 first_report = new_document = false;
153 /* Whether we added a new report header, or not, we still have a
154 * diagnostic message to emit.
156 char* msg = XMLString::transcode( condition.getMessage() );
157 fprintf( stderr, "%" PRIu64 ":%" PRIu64 ": %s: %s\n", loc->getLineNumber(),
158 loc->getColumnNumber(), warn ? "WARNING" : "ERROR", msg
160 XMLString::release( &msg );
162 /* Finally, we return "true" to tell the DOM parser that we've
163 * handled the error, and that it should continue parsing.
169 insufficient_arguments( bool status, const char *program_pathname )
171 /* Diagnostic routine to report a lack of any command arguments
172 * to specify the XML documents which are to be validated.
176 /* The "status" flag indicates an abnormal condition...
178 * We want to call "basename()" on the passed "program_pathname";
179 * while this is likely safe, it MAY try to modify the input string,
180 * so create a temporary working copy...
182 char progname[1 + strlen( program_pathname )];
184 /* ...then format and emit an appropriate diagnostic message.
186 strcpy( progname, program_pathname );
187 fprintf( stderr, "%s: no XML documents specified for validation\n"
188 "usage: %s [schema.xsd ...] document.xml ...\n", basename( progname ),
192 /* Irrespective of condition, we echo back the input state.
198 create_parser( XMLGrammarPool* pool )
200 /* Helper function, to instantiate a DOM parser with "LS", (load and
201 * save), capability, (although we intend to use only "load").
203 const XMLCh ls_id[] = { chLatin_L, chLatin_S, chNull };
205 /* Locate a DOM implementation, providing the requisite "LS" feature.
207 DOMImplementation* impl(
208 DOMImplementationRegistry::getDOMImplementation( ls_id ) );
210 /* Instantiate a parser, based on this DOM implementation.
213 impl->createLSParser(
214 DOMImplementationLS::MODE_SYNCHRONOUS,
216 XMLPlatformUtils::fgMemoryManager,
219 /* Retrieve a pointer to its configuration data...
221 DOMConfiguration* conf( parser->getDomConfig() );
223 /* ...so we may apply this commonly useful configuration.
225 conf->setParameter( XMLUni::fgDOMComments, false );
226 conf->setParameter( XMLUni::fgDOMDatatypeNormalization, true );
227 conf->setParameter( XMLUni::fgDOMElementContentWhitespace, false );
228 conf->setParameter( XMLUni::fgDOMNamespaces, true );
229 conf->setParameter( XMLUni::fgDOMEntities, false );
231 /* Enable validation.
233 conf->setParameter( XMLUni::fgDOMValidate, true );
234 conf->setParameter( XMLUni::fgXercesSchema, true );
235 conf->setParameter( XMLUni::fgXercesSchemaFullChecking, false );
237 /* Use the loaded grammar during parsing.
239 conf->setParameter( XMLUni::fgXercesUseCachedGrammarInParse, true );
241 /* Don't load schemas from any other source (e.g., from XML document's
242 * xsi:schemaLocation attributes).
244 conf->setParameter( XMLUni::fgXercesLoadSchema, false );
246 /* Xerces-C++ 3.1.0 is the first version with working support for
249 IF_XERCES_30100_PLUS(
250 conf->setParameter( XMLUni::fgXercesHandleMultipleImports, true )
253 /* We will release the DOM document ourselves.
255 conf->setParameter( XMLUni::fgXercesUserAdoptsDOMDocument, true );
257 /* Return a pointer to the instantiated parser.
263 validation_status( int argc, char **argv )
267 /* Initialize a grammer pool, for use by our parser instances.
269 MemoryManager* mm( XMLPlatformUtils::fgMemoryManager );
270 auto_ptr<XMLGrammarPool> gp( new XMLGrammarPoolImpl( mm ) );
272 /* Load the schema definitions into the grammar pool.
276 /* Instantiate a parser for the schema definition file(s).
278 DOMLSParser* parser( create_parser( gp.get() ) );
280 /* Initialize an error handler for the schema context,
281 * and bind it to the schema file parser.
283 error_handler eh( "XML Schema" );
284 parser->getDomConfig()->setParameter( XMLUni::fgDOMErrorHandler, &eh );
286 /* Scan command arguments, left to right, to identify any XML schema
287 * files which we are expected to interpret.
289 do { const char *source = argv[argind]; size_t extent = strlen( source );
290 if( (extent > 4) && (strcasecmp( source + extent - 4, ".xsd" ) == 0) )
292 /* We have a "*.xsd" file to parse; do so, loading the grammar...
294 if( !parser->loadGrammar( source, Grammar::SchemaGrammarType, true ) )
296 /* ...but complain, and bail out, if loading fails...
298 fprintf( stderr, "%s: error: unable to load\n", source );
301 if( eh.has_failed() )
303 * ...or if any schema parsing error was encountered.
308 /* We've exhausted the "*.xsd" file references; break out of
309 * the scanning loop, without further ceremony.
313 /* Continue for the next "*.xsd" file, if any, provided there
314 * have been no schema abormalities detected thus far.
316 } while( (retcode == 0) && (++argind < argc) );
318 /* We're finished with our schema parser; release its resource pool.
323 /* Before proceeding to parse any XML documents, check that any
324 * specified XML schemas have been loaded successfully.
328 /* It's okay to proceed, but it would be pointless to do so...
330 if( insufficient_arguments( argind >= argc, *argv ) )
332 * ...when there are no remaining arguments to specify any
333 * XML documents for checking; in this case, bail out.
337 /* Lock the grammar pool. This is necessary if we plan to use the
338 * same grammar pool in multiple threads (this way we can reuse the
339 * same grammar in multiple parsers). Locking the pool disallows any
340 * modifications to the pool, such as an attempt by one of the threads
341 * to cache additional schemas.
345 /* Instantiate a new parser, to process the XML documents.
347 DOMLSParser* parser( create_parser( gp.get() ) );
349 /* Initialize an error handler for the XML document context,
350 * and bind it to the new parser.
352 error_handler eh( "XML Document" );
353 parser->getDomConfig()->setParameter( XMLUni::fgDOMErrorHandler, &eh );
355 /* Process all remaining arguments, as references to XML documents.
357 while( argind < argc )
359 /* Reset the error handler state, prior to loading each document.
362 DOMDocument* doc( parser->parseURI( argv[argind++] ) );
364 /* In this application, all we care about is that the document
365 * can be successfully read by our validating parser; if we did
366 * read it successfully, we have no further use for it, se we
367 * may simply set it aside.
369 if( doc ) doc->release();
371 /* If any error occurred, while parsing the current document,
372 * the error handler will have recorded it; we need to capture
373 * that state here, for our eventual return code.
375 if( eh.has_failed() ) retcode = 1;
377 /* When all specified documents have been validated, we are done
378 * with our parser, so we may release its resource pool.
382 /* Report back, with the cumulative status from XML document parsing.
388 main( int argc, char **argv )
390 /* Fewer than one argument, after the command verb itself,
391 * is not useful; complain, and bail out.
393 if( insufficient_arguments( argc < 2, *argv ) )
396 /* We must initialize Xerces-C++, before we can use it.
398 XMLPlatformUtils::Initialize();
400 /* Determine the validation status for all specified XML documents,
401 * with respect to any specified XML schema definitions.
403 int retcode = validation_status( argc, argv );
405 /* Shut down the Xerces-C++ subsystem, before returning the resultant
406 * validation status code to the operating system.
408 XMLPlatformUtils::Terminate();
412 /* $RCSfile$: end of file */