1 karl 1.31 //%2005////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 mike 1.13 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
16 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 david.dillard 1.32 //
|
19 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
20 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
22 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
25 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Mike Brasher (mbrasher@bmc.com)
31 //
|
32 david.dillard 1.30 // Modified By: David Dillard, VERITAS Software Corp.
33 // (david.dillard@veritas.com)
|
34 mike 1.13 //
35 //%/////////////////////////////////////////////////////////////////////////////
36
37 ////////////////////////////////////////////////////////////////////////////////
38 //
39 // XmlParser
40 //
|
41 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
42 // serveral rules for well-formed XML:
|
43 mike 1.13 //
|
44 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
45 mike 1.13 //
|
46 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
47 mike 1.13 //
|
48 david.dillard 1.32 // 2. Comments have the form:
|
49 mike 1.13 //
|
50 david.dillard 1.32 // <!-- blah blah blah -->
|
51 mike 1.13 //
|
52 david.dillard 1.32 // 3. The following entity references are supported:
|
53 mike 1.13 //
|
54 david.dillard 1.32 // & - ampersand
55 // < - less-than
56 // > - greater-than
57 // " - full quote
58 // &apos - apostrophe
|
59 mike 1.13 //
|
60 kumpf 1.18 // as well as character (numeric) references:
61
62 // 1 - decimal reference for character '1'
63 // 1 - hexadecimal reference for character '1'
64 //
|
65 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
66 mike 1.13 //
|
67 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
68 mike 1.13 //
|
69 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
70 mike 1.13 //
|
71 david.dillard 1.32 // <![CDATA[
72 // ...
73 // ]]>
|
74 mike 1.13 //
|
75 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
78 // XmlAttribute values must be delimited.
|
79 mike 1.13 //
|
80 david.dillard 1.32 // 8. <!DOCTYPE...>
|
81 mike 1.13 //
82 // TODO:
83 //
|
84 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
|
85 david.dillard 1.32 // Handle <!DOCTYPE...> sections which are complicated (containing
|
86 mike 1.13 // rules rather than references to files).
87 //
|
88 david.dillard 1.32 // Remove newlines from string literals:
|
89 mike 1.13 //
90 // Example: <xyz x="hello
|
91 david.dillard 1.32 // world">
|
92 mike 1.13 //
93 ////////////////////////////////////////////////////////////////////////////////
94
|
95 sage 1.14 #include <Pegasus/Common/Config.h>
|
96 mike 1.13 #include <cctype>
97 #include <cstdio>
98 #include <cstdlib>
99 #include <cstring>
100 #include "XmlParser.h"
101 #include "Logger.h"
|
102 chuck 1.19 #include "ExceptionRep.h"
|
103 mike 1.13
104 PEGASUS_NAMESPACE_BEGIN
105
106 #define PEGASUS_ARRAY_T XmlEntry
107 # include "ArrayImpl.h"
108 #undef PEGASUS_ARRAY_T
109
110
111 ////////////////////////////////////////////////////////////////////////////////
112 //
113 // Static helper functions
114 //
115 ////////////////////////////////////////////////////////////////////////////////
116
117 static void _printValue(const char* p)
118 {
119 for (; *p; p++)
120 {
|
121 david.dillard 1.32 if (*p == '\n')
122 PEGASUS_STD(cout) << "\\n";
123 else if (*p == '\r')
124 PEGASUS_STD(cout) << "\\r";
125 else if (*p == '\t')
126 PEGASUS_STD(cout) << "\\t";
127 else
128 PEGASUS_STD(cout) << *p;
|
129 mike 1.13 }
130 }
131
132 struct EntityReference
133 {
134 const char* match;
135 Uint32 length;
136 char replacement;
137 };
138
|
139 kumpf 1.18 // ATTN: Add support for more entity references
|
140 mike 1.13 static EntityReference _references[] =
141 {
142 { "&", 5, '&' },
143 { "<", 4, '<' },
144 { ">", 4, '>' },
145 { """, 6, '"' },
146 { "'", 6, '\'' }
147 };
148
|
149 chuck 1.26
150 // Implements a check for a whitespace character, without calling
151 // isspace( ). The isspace( ) function is locale-sensitive,
152 // and incorrectly flags some chars above 0x7f as whitespace. This
153 // causes the XmlParser to incorrectly parse UTF-8 data.
154 //
155 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
156 // defines white space as:
|
157 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
158 chuck 1.26 static int _isspace(char c)
159 {
|
160 david.dillard 1.32 if (c == ' ' || c == '\r' || c == '\t' || c == '\n')
161 return 1;
162 return 0;
|
163 chuck 1.26 }
164
165
|
166 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
167
168 // Remove all redundant spaces from the given string:
169
170 static void _normalize(char* text)
171 {
172 char* p = text;
|
173 david.dillard 1.32 char* end = p + strlen(text);
|
174 mike 1.13
175 // Remove leading spaces:
176
|
177 chuck 1.26 while (_isspace(*p))
|
178 david.dillard 1.32 p++;
|
179 mike 1.13
180 if (p != text)
|
181 david.dillard 1.32 memmove(text, p, end - p + 1);
|
182 mike 1.13
183 p = text;
184
185 // Look for sequences of more than one space and remove all but one.
186
187 for (;;)
188 {
|
189 david.dillard 1.32 // Advance to the next space:
|
190 mike 1.13
|
191 david.dillard 1.32 while (*p && !_isspace(*p))
192 p++;
|
193 mike 1.13
|
194 david.dillard 1.32 if (!*p)
195 break;
|
196 mike 1.13
|
197 david.dillard 1.32 // Advance to the next non-space:
|
198 mike 1.13
|
199 david.dillard 1.32 char* q = p++;
|
200 mike 1.13
|
201 david.dillard 1.32 while (_isspace(*p))
202 p++;
|
203 mike 1.13
|
204 david.dillard 1.32 // Discard trailing spaces (if we are at the end):
|
205 mike 1.13
|
206 david.dillard 1.32 if (!*p)
207 {
208 *q = '\0';
209 break;
210 }
|
211 mike 1.13
|
212 david.dillard 1.32 // Remove the redundant spaces:
|
213 mike 1.13
|
214 david.dillard 1.32 const size_t n = p - q;
|
215 mike 1.13
|
216 david.dillard 1.32 if (n > 1)
217 {
218 *q++ = ' ';
219 memmove(q, p, end - p + 1);
220 p = q;
221 }
|
222 mike 1.13 }
223 }
224
225 ////////////////////////////////////////////////////////////////////////////////
226 //
227 // XmlException
228 //
229 ////////////////////////////////////////////////////////////////////////////////
230
231 static const char* _xmlMessages[] =
232 {
233 "Bad opening element",
234 "Bad closing element",
235 "Bad attribute name",
236 "Exepected equal sign",
237 "Bad attribute value",
238 "A \"--\" sequence found within comment",
239 "Unterminated comment",
240 "Unterminated CDATA block",
241 "Unterminated DOCTYPE",
242 "Too many attributes: parser only handles 10",
243 mike 1.13 "Malformed reference",
244 "Expected a comment or CDATA following \"<!\" sequence",
245 "Closing element does not match opening element",
246 "One or more tags are still open",
247 "More than one root element was encountered",
248 "Validation error",
249 "Semantic error"
250 };
251
|
252 david.dillard 1.32 static const char* _xmlKeys[] =
|
253 chuck 1.19 {
|
254 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
255 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
256 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
257 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
258 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
259 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
260 "Common.XmlParser.UNTERMINATED_COMMENT",
261 "Common.XmlParser.UNTERMINATED_CDATA",
262 "Common.XmlParser.UNTERMINATED_DOCTYPE",
263 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
264 "Common.XmlParser.MALFORMED_REFERENCE",
265 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
266 "Common.XmlParser.START_END_MISMATCH",
|
267 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
268 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
269 "Common.XmlParser.VALIDATION_ERROR",
270 "Common.XmlParser.SEMANTIC_ERROR"
271 };
272
|
273 chuck 1.23 // l10n replace _formMessage (comment out the old one)
|
274 chuck 1.19 /*
|
275 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
276 {
277 String result = _xmlMessages[Uint32(code) - 1];
278
279 char buffer[32];
280 sprintf(buffer, "%d", line);
281 result.append(": on line ");
282 result.append(buffer);
283
284 if (message.size())
285 {
|
286 david.dillard 1.32 result.append(": ");
287 result.append(message);
|
288 mike 1.13 }
289
290 return result;
291 }
|
292 chuck 1.19 */
293
294 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
295 {
296 String dftMsg = _xmlMessages[Uint32(code) - 1];
297 String key = _xmlKeys[Uint32(code) - 1];
|
298 david.dillard 1.32 String msg = message;
|
299 chuck 1.19
300 dftMsg.append(": on line $0");
301 if (message.size())
302 {
|
303 david.dillard 1.32 msg = ": " + msg;
304 dftMsg.append("$1");
305 }
|
306 chuck 1.19
307 return MessageLoaderParms(key, dftMsg, line ,msg);
308 }
309
310 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
311 {
312 String dftMsg = _xmlMessages[Uint32(code) - 1];
313 String key = _xmlKeys[Uint32(code) - 1];
314
315 dftMsg.append(": on line $0");
|
316 david.dillard 1.32
|
317 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
318 }
319
|
320 mike 1.13
321 XmlException::XmlException(
|
322 david.dillard 1.32 XmlException::Code code,
|
323 mike 1.13 Uint32 lineNumber,
|
324 david.dillard 1.32 const String& message)
|
325 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
326 {
327
328 }
329
|
330 chuck 1.19
331 XmlException::XmlException(
|
332 david.dillard 1.32 XmlException::Code code,
|
333 chuck 1.19 Uint32 lineNumber,
|
334 david.dillard 1.32 MessageLoaderParms& msgParms)
|
335 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
336 {
|
337 david.dillard 1.32 if (msgParms.default_msg.size())
|
338 humberto 1.21 {
|
339 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
340 }
341 _rep->message.append(MessageLoader::getMessage(msgParms));
|
342 chuck 1.19 }
343
344
|
345 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
346 //
347 // XmlValidationError
348 //
349 ////////////////////////////////////////////////////////////////////////////////
350
351 XmlValidationError::XmlValidationError(
352 Uint32 lineNumber,
353 const String& message)
354 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
355 {
356
357 }
358
|
359 chuck 1.19
360 XmlValidationError::XmlValidationError(
361 Uint32 lineNumber,
362 MessageLoaderParms& msgParms)
363 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
364 {
365
366 }
367
368
|
369 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
370 //
371 // XmlSemanticError
372 //
373 ////////////////////////////////////////////////////////////////////////////////
374
375 XmlSemanticError::XmlSemanticError(
376 Uint32 lineNumber,
377 const String& message)
378 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
379 {
380
381 }
|
382 chuck 1.19
383
384 XmlSemanticError::XmlSemanticError(
385 Uint32 lineNumber,
386 MessageLoaderParms& msgParms)
387 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
388 {
389
390 }
391
|
392 mike 1.13
393 ////////////////////////////////////////////////////////////////////////////////
394 //
395 // XmlParser
396 //
397 ////////////////////////////////////////////////////////////////////////////////
398
|
399 david.dillard 1.32 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
|
400 mike 1.13 _restoreChar('\0'), _foundRoot(false)
401 {
402
403 }
404
405 Boolean XmlParser::next(XmlEntry& entry)
406 {
407 if (!_putBackStack.isEmpty())
408 {
|
409 david.dillard 1.32 entry = _putBackStack.top();
410 _putBackStack.pop();
411 return true;
|
412 mike 1.13 }
413
414 // If a character was overwritten with a null-terminator the last
415 // time this routine was called, then put back that character. Before
416 // exiting of course, restore the null-terminator.
417
418 char* nullTerminator = 0;
419
420 if (_restoreChar && !*_current)
421 {
|
422 david.dillard 1.32 nullTerminator = _current;
423 *_current = _restoreChar;
424 _restoreChar = '\0';
|
425 mike 1.13 }
426
427 // Skip over any whitespace:
428
429 _skipWhitespace(_current);
430
431 if (!*_current)
432 {
|
433 david.dillard 1.32 if (nullTerminator)
434 *nullTerminator = '\0';
|
435 mike 1.13
|
436 david.dillard 1.32 if (!_stack.isEmpty())
437 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
438 mike 1.13
|
439 david.dillard 1.32 return false;
|
440 mike 1.13 }
441
442 // Either a "<...>" or content begins next:
443
444 if (*_current == '<')
445 {
|
446 david.dillard 1.32 _current++;
447 _getElement(_current, entry);
|
448 mike 1.13
|
449 david.dillard 1.32 if (nullTerminator)
450 *nullTerminator = '\0';
|
451 mike 1.13
|
452 david.dillard 1.32 if (entry.type == XmlEntry::START_TAG)
453 {
454 if (_stack.isEmpty() && _foundRoot)
455 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
456
457 _foundRoot = true;
458 _stack.push((char*)entry.text);
459 }
460 else if (entry.type == XmlEntry::END_TAG)
461 {
462 if (_stack.isEmpty())
463 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
464 mike 1.13
|
465 david.dillard 1.32 if (strcmp(_stack.top(), entry.text) != 0)
466 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
467 mike 1.13
|
468 david.dillard 1.32 _stack.pop();
469 }
|
470 mike 1.13
|
471 david.dillard 1.32 return true;
|
472 mike 1.13 }
473 else
474 {
|
475 david.dillard 1.32 entry.type = XmlEntry::CONTENT;
476 entry.text = _current;
477 _getContent(_current);
478 _restoreChar = *_current;
479 *_current = '\0';
|
480 mike 1.13
|
481 david.dillard 1.32 if (nullTerminator)
482 *nullTerminator = '\0';
|
483 mike 1.13
|
484 david.dillard 1.32 _substituteReferences((char*)entry.text);
485 _normalize((char*)entry.text);
|
486 mike 1.13
|
487 david.dillard 1.32 return true;
|
488 mike 1.13 }
489 }
490
491 void XmlParser::putBack(XmlEntry& entry)
492 {
493 _putBackStack.push(entry);
494 }
495
496 XmlParser::~XmlParser()
497 {
498 // Nothing to do!
499 }
500
501 void XmlParser::_skipWhitespace(char*& p)
502 {
|
503 chuck 1.26 while (*p && _isspace(*p))
|
504 mike 1.13 {
|
505 david.dillard 1.32 if (*p == '\n')
506 _line++;
|
507 mike 1.13
|
508 david.dillard 1.32 p++;
|
509 mike 1.13 }
510 }
511
512 Boolean XmlParser::_getElementName(char*& p)
513 {
|
514 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
515 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
516 (*p == '_')))
|
517 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
518 kumpf 1.24 p++;
|
519 mike 1.13
|
520 david 1.22 while ((*p) &&
|
521 david.dillard 1.32 (((*p >= 'A') && (*p <= 'Z')) ||
522 ((*p >= 'a') && (*p <= 'z')) ||
523 ((*p >= '0') && (*p <= '9')) ||
524 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
525 p++;
|
526 mike 1.13
527 // The next character must be a space:
528
|
529 chuck 1.26 if (_isspace(*p))
|
530 mike 1.13 {
|
531 david.dillard 1.32 *p++ = '\0';
532 _skipWhitespace(p);
|
533 mike 1.13 }
534
535 if (*p == '>')
536 {
|
537 david.dillard 1.32 *p++ = '\0';
538 return true;
|
539 mike 1.13 }
540
541 return false;
542 }
543
544 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
545 {
546 openCloseElement = false;
547
|
548 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
549 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
550 (*p == '_')))
|
551 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
552 kumpf 1.24 p++;
|
553 mike 1.13
|
554 david 1.22 while ((*p) &&
|
555 david.dillard 1.32 (((*p >= 'A') && (*p <= 'Z')) ||
556 ((*p >= 'a') && (*p <= 'z')) ||
557 ((*p >= '0') && (*p <= '9')) ||
558 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
559 p++;
|
560 mike 1.13
561 // The next character must be a space:
562
|
563 chuck 1.26 if (_isspace(*p))
|
564 mike 1.13 {
|
565 david.dillard 1.32 *p++ = '\0';
566 _skipWhitespace(p);
|
567 mike 1.13 }
568
569 if (*p == '>')
570 {
|
571 david.dillard 1.32 *p++ = '\0';
572 return true;
|
573 mike 1.13 }
574
575 if (p[0] == '/' && p[1] == '>')
576 {
|
577 david.dillard 1.32 openCloseElement = true;
578 *p = '\0';
579 p += 2;
580 return true;
|
581 mike 1.13 }
582
583 return false;
584 }
585
586 void XmlParser::_getAttributeNameAndEqual(char*& p)
587 {
|
588 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
589 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
590 (*p == '_')))
|
591 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
592 kumpf 1.24 p++;
|
593 mike 1.13
|
594 david 1.22 while ((*p) &&
|
595 david.dillard 1.32 (((*p >= 'A') && (*p <= 'Z')) ||
596 ((*p >= 'a') && (*p <= 'z')) ||
597 ((*p >= '0') && (*p <= '9')) ||
598 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
599 p++;
|
600 mike 1.13
601 char* term = p;
602
603 _skipWhitespace(p);
604
605 if (*p != '=')
|
606 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
607 mike 1.13
608 p++;
609
610 _skipWhitespace(p);
611
612 *term = '\0';
613 }
614
615 void XmlParser::_getAttributeValue(char*& p)
616 {
617 // ATTN-B: handle values contained in semiquotes:
618
619 if (*p != '"' && *p != '\'')
|
620 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
|
621 mike 1.13
622 char startChar = *p++;
623
624 while (*p && *p != startChar)
|
625 david.dillard 1.32 p++;
|
626 mike 1.13
627 if (*p != startChar)
|
628 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
|
629 mike 1.13
630 *p++ = '\0';
631 }
632
633 void XmlParser::_getComment(char*& p)
634 {
635 // Now p points to first non-whitespace character beyond "<--" sequence:
636
637 for (; *p; p++)
638 {
|
639 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
640 {
641 if (p[2] != '>')
642 {
643 throw XmlException(
644 XmlException::MINUS_MINUS_IN_COMMENT, _line);
645 }
646
647 // Find end of comment (excluding whitespace):
648
649 *p = '\0';
650 p += 3;
651 return;
652 }
|
653 mike 1.13 }
654
655 // If it got this far, then the comment is unterminated:
656
657 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
658 }
659
660 void XmlParser::_getCData(char*& p)
661 {
662 // At this point p points one past "<![CDATA[" sequence:
663
664 for (; *p; p++)
665 {
|
666 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
667 {
668 *p = '\0';
669 p += 3;
670 return;
671 }
672 else if (*p == '\n')
673 _line++;
|
674 mike 1.13 }
675
676 // If it got this far, then the comment is unterminated:
677
678 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
679 }
680
681 void XmlParser::_getDocType(char*& p)
682 {
683 // Just ignore the DOCTYPE command for now:
684
685 for (; *p && *p != '>'; p++)
686 {
|
687 david.dillard 1.32 if (*p == '\n')
688 _line++;
|
689 mike 1.13 }
690
691 if (*p != '>')
|
692 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
693 mike 1.13
694 p++;
695 }
696
697 void XmlParser::_getContent(char*& p)
698 {
699 while (*p && *p != '<')
700 {
|
701 david.dillard 1.32 if (*p == '\n')
702 _line++;
|
703 mike 1.13
|
704 david.dillard 1.32 p++;
|
705 mike 1.13 }
706 }
707
708 void XmlParser::_substituteReferences(char* text)
709 {
|
710 david.dillard 1.32 size_t rem = strlen(text);
|
711 mike 1.13
712 for (char* p = text; *p; p++, rem--)
713 {
|
714 david.dillard 1.32 if (*p == '&')
715 {
|
716 kumpf 1.18 // Process character or entity reference
|
717 mike 1.13
|
718 kumpf 1.18 Uint16 referenceChar = 0;
719 Uint32 referenceLength = 0;
720 XmlException::Code code = XmlException::MALFORMED_REFERENCE;
721
722 if (*(p+1) == '#')
723 {
724 // Found a character (numeric) reference
725 // Determine whether it is decimal or hex
726 if (*(p+2) == 'x')
727 {
728 // Decode a hexadecimal character reference
729 char* q = p+3;
730
731 // At most four digits are allowed, plus trailing ';'
732 Uint32 numDigits;
733 for (numDigits = 0; numDigits < 5; numDigits++, q++)
734 {
735 if (isdigit(*q))
736 {
737 referenceChar = (referenceChar << 4);
738 referenceChar += (*q - '0');
739 kumpf 1.18 }
740 else if ((*q >= 'A') && (*q <= 'F'))
741 {
742 referenceChar = (referenceChar << 4);
743 referenceChar += (*q - 'A' + 10);
744 }
745 else if ((*q >= 'a') && (*q <= 'f'))
746 {
747 referenceChar = (referenceChar << 4);
748 referenceChar += (*q - 'a' + 10);
749 }
750 else if (*q == ';')
751 {
752 break;
753 }
754 else
755 {
756 throw XmlException(code, _line);
757 }
758 }
759
760 kumpf 1.18 // Hex number must be 1 - 4 digits
761 if ((numDigits == 0) || (numDigits > 4))
762 {
763 throw XmlException(code, _line);
764 }
765
766 // ATTN: Currently do not support 16-bit characters
767 if (referenceChar > 0xff)
768 {
769 // ATTN: Is there a good way to say "unsupported"?
770 throw XmlException(code, _line);
771 }
772
773 referenceLength = numDigits + 4;
774 }
775 else
776 {
777 // Decode a decimal character reference
778 Uint32 newChar = 0;
779 char* q = p+2;
780
781 kumpf 1.18 // At most five digits are allowed, plus trailing ';'
782 Uint32 numDigits;
783 for (numDigits = 0; numDigits < 6; numDigits++, q++)
784 {
785 if (isdigit(*q))
786 {
787 newChar = (newChar * 10);
788 newChar += (*q - '0');
789 }
790 else if (*q == ';')
791 {
792 break;
793 }
794 else
795 {
796 throw XmlException(code, _line);
797 }
798 }
799
800 // Decimal number must be 1 - 5 digits and fit in 16 bits
801 if ((numDigits == 0) || (numDigits > 5) ||
802 kumpf 1.18 (newChar > 0xffff))
803 {
804 throw XmlException(code, _line);
805 }
806
807 // ATTN: Currently do not support 16-bit characters
808 if (newChar > 0xff)
809 {
810 // ATTN: Is there a good way to say "unsupported"?
811 throw XmlException(code, _line);
812 }
813
814 referenceChar = Uint16(newChar);
815 referenceLength = numDigits + 3;
816 }
817 }
818 else
819 {
820 // Check for entity reference
821 // ATTN: Inefficient if many entity references are supported
822 Uint32 i;
823 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++)
824 {
825 Uint32 length = _references[i].length;
826 const char* match = _references[i].match;
827
828 if (strncmp(p, _references[i].match, length) == 0)
829 {
830 referenceChar = _references[i].replacement;
831 referenceLength = length;
832 break;
833 }
834 }
835
836 if (i == _REFERENCES_SIZE)
837 {
838 // Didn't recognize the entity reference
839 // ATTN: Is there a good way to say "unsupported"?
840 throw XmlException(code, _line);
841 }
842 }
843
844 kumpf 1.18 // Replace the reference with the correct character
845 *p = (char)referenceChar;
846 char* q = p + referenceLength;
847 rem = rem - referenceLength + 1;
848 memmove(p + 1, q, rem);
|
849 david.dillard 1.32 }
|
850 mike 1.13 }
851 }
852
853 static const char _EMPTY_STRING[] = "";
854
855 void XmlParser::_getElement(char*& p, XmlEntry& entry)
856 {
857 entry.attributeCount = 0;
858
859 //--------------------------------------------------------------------------
860 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
861 //--------------------------------------------------------------------------
862
863 if (*p == '?')
864 {
|
865 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
866 entry.text = ++p;
|
867 mike 1.13
|
868 david.dillard 1.32 Boolean openCloseElement = false;
|
869 mike 1.13
|
870 david.dillard 1.32 if (_getElementName(p))
871 return;
|
872 mike 1.13 }
873 else if (*p == '!')
874 {
|
875 david.dillard 1.32 p++;
|
876 mike 1.13
|
877 david.dillard 1.32 // Expect a comment or CDATA:
|
878 mike 1.13
|
879 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
880 {
881 p += 2;
882 entry.type = XmlEntry::COMMENT;
883 entry.text = p;
884 _getComment(p);
885 return;
886 }
887 else if (memcmp(p, "[CDATA[", 7) == 0)
888 {
889 p += 7;
890 entry.type = XmlEntry::CDATA;
891 entry.text = p;
892 _getCData(p);
893 return;
894 }
895 else if (memcmp(p, "DOCTYPE", 7) == 0)
896 {
897 entry.type = XmlEntry::DOCTYPE;
898 entry.text = _EMPTY_STRING;
899 _getDocType(p);
900 david.dillard 1.32 return;
901 }
902 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
903 mike 1.13 }
904 else if (*p == '/')
905 {
|
906 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
907 entry.text = ++p;
|
908 mike 1.13
|
909 david.dillard 1.32 if (!_getElementName(p))
910 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
911 mike 1.13
|
912 david.dillard 1.32 return;
|
913 mike 1.13 }
|
914 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
915 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
916 (*p == '_')))
|
917 mike 1.13 {
|
918 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
919 entry.text = p;
|
920 mike 1.13
|
921 david.dillard 1.32 Boolean openCloseElement = false;
|
922 mike 1.13
|
923 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement))
924 {
925 if (openCloseElement)
926 entry.type = XmlEntry::EMPTY_TAG;
927 return;
928 }
|
929 mike 1.13 }
930 else
|
931 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
932 mike 1.13
933 //--------------------------------------------------------------------------
934 // Grab all the attributes:
935 //--------------------------------------------------------------------------
936
937 for (;;)
938 {
|
939 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
940 {
941 if (p[0] == '?' && p[1] == '>')
942 {
943 p += 2;
944 return;
945 }
946 }
947 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
948 {
949 entry.type = XmlEntry::EMPTY_TAG;
950 p += 2;
951 return;
952 }
953 else if (*p == '>')
954 {
955 p++;
956 return;
957 }
958
959 XmlAttribute attr;
960 david.dillard 1.32 attr.name = p;
961 _getAttributeNameAndEqual(p);
962
963 if (*p != '"' && *p != '\'')
964 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
965
966 attr.value = p + 1;
967 _getAttributeValue(p);
968
969 if (entry.type == XmlEntry::XML_DECLARATION)
970 {
971 // The next thing must a space or a "?>":
|
972 mike 1.13
|
973 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
974 {
975 throw XmlException(
976 XmlException::BAD_ATTRIBUTE_VALUE, _line);
977 }
978 }
979 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
980 {
981 // The next thing must be a space or a '>':
|
982 mike 1.13
|
983 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
984 }
|
985 mike 1.13
|
986 david.dillard 1.32 _skipWhitespace(p);
987
988 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
989 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
990
991 _substituteReferences((char*)attr.value);
992 entry.attributes[entry.attributeCount++] = attr;
|
993 mike 1.13 }
994 }
995
996 static const char* _typeStrings[] =
997 {
|
998 david.dillard 1.32 "XML_DECLARATION",
999 "START_TAG",
1000 "EMPTY_TAG",
1001 "END_TAG",
|
1002 mike 1.13 "COMMENT",
1003 "CDATA",
1004 "DOCTYPE",
|
1005 david.dillard 1.32 "CONTENT"
|
1006 mike 1.13 };
1007
1008 void XmlEntry::print() const
1009 {
1010 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1011
1012 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1013
1014 if (needQuotes)
|
1015 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1016
|
1017 mike 1.13 _printValue(text);
1018
1019 if (needQuotes)
|
1020 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1021 mike 1.13
1022 PEGASUS_STD(cout) << '\n';
1023
1024 for (Uint32 i = 0; i < attributeCount; i++)
1025 {
|
1026 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1027 _printValue(attributes[i].value);
1028 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1029 mike 1.13 }
1030 }
1031
1032 const XmlAttribute* XmlEntry::findAttribute(
1033 const char* name) const
1034 {
1035 for (Uint32 i = 0; i < attributeCount; i++)
1036 {
|
1037 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1038 return &attributes[i];
|
1039 mike 1.13 }
1040
1041 return 0;
1042 }
1043
1044 // Find first non-whitespace character (set first) and last non-whitespace
1045 // character (set last one past this). For example, consider this string:
1046 //
|
1047 david.dillard 1.32 // " 87 "
|
1048 mike 1.13 //
1049 // The first pointer would point to '8' and the last pointer woudl point one
1050 // beyond '7'.
1051
1052 static void _findEnds(
|
1053 david.dillard 1.32 const char* str,
1054 const char*& first,
|
1055 mike 1.13 const char*& last)
1056 {
1057 first = str;
1058
|
1059 chuck 1.26 while (_isspace(*first))
|
1060 david.dillard 1.32 first++;
|
1061 mike 1.13
1062 if (!*first)
1063 {
|
1064 david.dillard 1.32 last = first;
1065 return;
|
1066 mike 1.13 }
1067
1068 last = first + strlen(first);
1069
|
1070 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1071 david.dillard 1.32 last--;
|
1072 mike 1.13 }
1073
1074 Boolean XmlEntry::getAttributeValue(
|
1075 david.dillard 1.32 const char* name,
|
1076 mike 1.13 Uint32& value) const
1077 {
1078 const XmlAttribute* attr = findAttribute(name);
1079
1080 if (!attr)
|
1081 david.dillard 1.32 return false;
|
1082 mike 1.13
1083 const char* first;
1084 const char* last;
1085 _findEnds(attr->value, first, last);
1086
1087 char* end = 0;
1088 long tmp = strtol(first, &end, 10);
1089
1090 if (!end || end != last)
|
1091 david.dillard 1.32 return false;
|
1092 mike 1.13
1093 value = Uint32(tmp);
1094 return true;
1095 }
1096
1097 Boolean XmlEntry::getAttributeValue(
|
1098 david.dillard 1.32 const char* name,
|
1099 mike 1.13 Real32& value) const
1100 {
1101 const XmlAttribute* attr = findAttribute(name);
1102
1103 if (!attr)
|
1104 david.dillard 1.32 return false;
|
1105 mike 1.13
1106 const char* first;
1107 const char* last;
1108 _findEnds(attr->value, first, last);
1109
1110 char* end = 0;
1111 double tmp = strtod(first, &end);
1112
1113 if (!end || end != last)
|
1114 david.dillard 1.32 return false;
|
1115 mike 1.13
|
1116 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1117 mike 1.13 return true;
1118 }
1119
1120 Boolean XmlEntry::getAttributeValue(
|
1121 david.dillard 1.32 const char* name,
|
1122 mike 1.13 const char*& value) const
1123 {
1124 const XmlAttribute* attr = findAttribute(name);
1125
1126 if (!attr)
|
1127 david.dillard 1.32 return false;
|
1128 mike 1.13
1129 value = attr->value;
1130 return true;
1131 }
1132
1133 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1134 {
1135 const char* tmp;
1136
1137 if (!getAttributeValue(name, tmp))
|
1138 david.dillard 1.32 return false;
|
1139 mike 1.13
|
1140 chuck 1.28 value = String(tmp);
|
1141 mike 1.13 return true;
1142 }
1143
|
1144 david.dillard 1.30 void XmlAppendCString(Array<char>& out, const char* str)
|
1145 mike 1.13 {
|
1146 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1147 mike 1.13 }
1148
1149 PEGASUS_NAMESPACE_END
|