1 karl 1.31 //%2005////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 mike 1.13 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
16 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 david.dillard 1.32 //
|
19 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
20 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
22 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
25 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Mike Brasher (mbrasher@bmc.com)
31 //
|
32 david.dillard 1.30 // Modified By: David Dillard, VERITAS Software Corp.
33 // (david.dillard@veritas.com)
|
34 mike 1.13 //
35 //%/////////////////////////////////////////////////////////////////////////////
36
37 ////////////////////////////////////////////////////////////////////////////////
38 //
39 // XmlParser
40 //
|
41 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
42 // serveral rules for well-formed XML:
|
43 mike 1.13 //
|
44 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
45 mike 1.13 //
|
46 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
47 mike 1.13 //
|
48 david.dillard 1.32 // 2. Comments have the form:
|
49 mike 1.13 //
|
50 david.dillard 1.32 // <!-- blah blah blah -->
|
51 mike 1.13 //
|
52 david.dillard 1.32 // 3. The following entity references are supported:
|
53 mike 1.13 //
|
54 david.dillard 1.32 // & - ampersand
55 // < - less-than
56 // > - greater-than
57 // " - full quote
58 // &apos - apostrophe
|
59 mike 1.13 //
|
60 kumpf 1.18 // as well as character (numeric) references:
61
62 // 1 - decimal reference for character '1'
63 // 1 - hexadecimal reference for character '1'
64 //
|
65 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
66 mike 1.13 //
|
67 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
68 mike 1.13 //
|
69 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
70 mike 1.13 //
|
71 david.dillard 1.32 // <![CDATA[
72 // ...
73 // ]]>
|
74 mike 1.13 //
|
75 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
78 // XmlAttribute values must be delimited.
|
79 mike 1.13 //
|
80 david.dillard 1.32 // 8. <!DOCTYPE...>
|
81 mike 1.13 //
82 // TODO:
83 //
|
84 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
|
85 david.dillard 1.32 // Handle <!DOCTYPE...> sections which are complicated (containing
|
86 mike 1.13 // rules rather than references to files).
87 //
|
88 david.dillard 1.32 // Remove newlines from string literals:
|
89 mike 1.13 //
90 // Example: <xyz x="hello
|
91 david.dillard 1.32 // world">
|
92 mike 1.13 //
93 ////////////////////////////////////////////////////////////////////////////////
94
|
95 sage 1.14 #include <Pegasus/Common/Config.h>
|
96 mike 1.13 #include <cctype>
97 #include <cstdio>
98 #include <cstdlib>
99 #include <cstring>
100 #include "XmlParser.h"
101 #include "Logger.h"
|
102 chuck 1.19 #include "ExceptionRep.h"
|
103 mike 1.32.2.1 #include "CharSet.h"
|
104 mike 1.13
105 PEGASUS_NAMESPACE_BEGIN
106
107 #define PEGASUS_ARRAY_T XmlEntry
108 # include "ArrayImpl.h"
109 #undef PEGASUS_ARRAY_T
110
111
112 ////////////////////////////////////////////////////////////////////////////////
113 //
114 // Static helper functions
115 //
116 ////////////////////////////////////////////////////////////////////////////////
117
118 static void _printValue(const char* p)
119 {
120 for (; *p; p++)
121 {
|
122 david.dillard 1.32 if (*p == '\n')
123 PEGASUS_STD(cout) << "\\n";
124 else if (*p == '\r')
125 PEGASUS_STD(cout) << "\\r";
126 else if (*p == '\t')
127 PEGASUS_STD(cout) << "\\t";
128 else
129 PEGASUS_STD(cout) << *p;
|
130 mike 1.13 }
131 }
132
133 struct EntityReference
134 {
135 const char* match;
136 Uint32 length;
137 char replacement;
138 };
139
|
140 kumpf 1.18 // ATTN: Add support for more entity references
|
141 mike 1.13 static EntityReference _references[] =
142 {
143 { "&", 5, '&' },
144 { "<", 4, '<' },
145 { ">", 4, '>' },
146 { """, 6, '"' },
147 { "'", 6, '\'' }
148 };
149
|
150 chuck 1.26
151 // Implements a check for a whitespace character, without calling
152 // isspace( ). The isspace( ) function is locale-sensitive,
153 // and incorrectly flags some chars above 0x7f as whitespace. This
154 // causes the XmlParser to incorrectly parse UTF-8 data.
155 //
156 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
157 // defines white space as:
|
158 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
159 mike 1.32.2.1 static inline int _isspace(char c)
|
160 chuck 1.26 {
|
161 mike 1.32.2.2 return CharSet::is_space((Uint8)c);
|
162 chuck 1.26 }
163
|
164 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
165
166 // Remove all redundant spaces from the given string:
167
168 static void _normalize(char* text)
169 {
170 char* p = text;
|
171 david.dillard 1.32 char* end = p + strlen(text);
|
172 mike 1.13
173 // Remove leading spaces:
174
|
175 chuck 1.26 while (_isspace(*p))
|
176 david.dillard 1.32 p++;
|
177 mike 1.13
178 if (p != text)
|
179 david.dillard 1.32 memmove(text, p, end - p + 1);
|
180 mike 1.13
181 p = text;
182
183 // Look for sequences of more than one space and remove all but one.
184
185 for (;;)
186 {
|
187 david.dillard 1.32 // Advance to the next space:
|
188 mike 1.13
|
189 david.dillard 1.32 while (*p && !_isspace(*p))
190 p++;
|
191 mike 1.13
|
192 david.dillard 1.32 if (!*p)
193 break;
|
194 mike 1.13
|
195 david.dillard 1.32 // Advance to the next non-space:
|
196 mike 1.13
|
197 david.dillard 1.32 char* q = p++;
|
198 mike 1.13
|
199 david.dillard 1.32 while (_isspace(*p))
200 p++;
|
201 mike 1.13
|
202 david.dillard 1.32 // Discard trailing spaces (if we are at the end):
|
203 mike 1.13
|
204 david.dillard 1.32 if (!*p)
205 {
206 *q = '\0';
207 break;
208 }
|
209 mike 1.13
|
210 david.dillard 1.32 // Remove the redundant spaces:
|
211 mike 1.13
|
212 david.dillard 1.32 const size_t n = p - q;
|
213 mike 1.13
|
214 david.dillard 1.32 if (n > 1)
215 {
216 *q++ = ' ';
217 memmove(q, p, end - p + 1);
218 p = q;
219 }
|
220 mike 1.13 }
221 }
222
223 ////////////////////////////////////////////////////////////////////////////////
224 //
225 // XmlException
226 //
227 ////////////////////////////////////////////////////////////////////////////////
228
229 static const char* _xmlMessages[] =
230 {
231 "Bad opening element",
232 "Bad closing element",
233 "Bad attribute name",
234 "Exepected equal sign",
235 "Bad attribute value",
236 "A \"--\" sequence found within comment",
237 "Unterminated comment",
238 "Unterminated CDATA block",
239 "Unterminated DOCTYPE",
240 "Too many attributes: parser only handles 10",
241 mike 1.13 "Malformed reference",
242 "Expected a comment or CDATA following \"<!\" sequence",
243 "Closing element does not match opening element",
244 "One or more tags are still open",
245 "More than one root element was encountered",
246 "Validation error",
247 "Semantic error"
248 };
249
|
250 david.dillard 1.32 static const char* _xmlKeys[] =
|
251 chuck 1.19 {
|
252 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
253 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
254 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
255 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
256 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
257 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
258 "Common.XmlParser.UNTERMINATED_COMMENT",
259 "Common.XmlParser.UNTERMINATED_CDATA",
260 "Common.XmlParser.UNTERMINATED_DOCTYPE",
261 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
262 "Common.XmlParser.MALFORMED_REFERENCE",
263 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
264 "Common.XmlParser.START_END_MISMATCH",
|
265 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
266 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
267 "Common.XmlParser.VALIDATION_ERROR",
268 "Common.XmlParser.SEMANTIC_ERROR"
269 };
270
|
271 chuck 1.23 // l10n replace _formMessage (comment out the old one)
|
272 chuck 1.19 /*
|
273 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
274 {
275 String result = _xmlMessages[Uint32(code) - 1];
276
277 char buffer[32];
278 sprintf(buffer, "%d", line);
279 result.append(": on line ");
280 result.append(buffer);
281
282 if (message.size())
283 {
|
284 david.dillard 1.32 result.append(": ");
285 result.append(message);
|
286 mike 1.13 }
287
288 return result;
289 }
|
290 chuck 1.19 */
291
292 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
293 {
294 String dftMsg = _xmlMessages[Uint32(code) - 1];
295 String key = _xmlKeys[Uint32(code) - 1];
|
296 david.dillard 1.32 String msg = message;
|
297 chuck 1.19
298 dftMsg.append(": on line $0");
299 if (message.size())
300 {
|
301 david.dillard 1.32 msg = ": " + msg;
302 dftMsg.append("$1");
303 }
|
304 chuck 1.19
305 return MessageLoaderParms(key, dftMsg, line ,msg);
306 }
307
308 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
309 {
310 String dftMsg = _xmlMessages[Uint32(code) - 1];
311 String key = _xmlKeys[Uint32(code) - 1];
312
313 dftMsg.append(": on line $0");
|
314 david.dillard 1.32
|
315 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
316 }
317
|
318 mike 1.13
319 XmlException::XmlException(
|
320 david.dillard 1.32 XmlException::Code code,
|
321 mike 1.13 Uint32 lineNumber,
|
322 david.dillard 1.32 const String& message)
|
323 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
324 {
325
326 }
327
|
328 chuck 1.19
329 XmlException::XmlException(
|
330 david.dillard 1.32 XmlException::Code code,
|
331 chuck 1.19 Uint32 lineNumber,
|
332 david.dillard 1.32 MessageLoaderParms& msgParms)
|
333 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
334 {
|
335 david.dillard 1.32 if (msgParms.default_msg.size())
|
336 humberto 1.21 {
|
337 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
338 }
339 _rep->message.append(MessageLoader::getMessage(msgParms));
|
340 chuck 1.19 }
341
342
|
343 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
344 //
345 // XmlValidationError
346 //
347 ////////////////////////////////////////////////////////////////////////////////
348
349 XmlValidationError::XmlValidationError(
350 Uint32 lineNumber,
351 const String& message)
352 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
353 {
354
355 }
356
|
357 chuck 1.19
358 XmlValidationError::XmlValidationError(
359 Uint32 lineNumber,
360 MessageLoaderParms& msgParms)
361 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
362 {
363
364 }
365
366
|
367 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
368 //
369 // XmlSemanticError
370 //
371 ////////////////////////////////////////////////////////////////////////////////
372
373 XmlSemanticError::XmlSemanticError(
374 Uint32 lineNumber,
375 const String& message)
376 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
377 {
378
379 }
|
380 chuck 1.19
381
382 XmlSemanticError::XmlSemanticError(
383 Uint32 lineNumber,
384 MessageLoaderParms& msgParms)
385 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
386 {
387
388 }
389
|
390 mike 1.13
391 ////////////////////////////////////////////////////////////////////////////////
392 //
393 // XmlParser
394 //
395 ////////////////////////////////////////////////////////////////////////////////
396
|
397 david.dillard 1.32 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
|
398 mike 1.13 _restoreChar('\0'), _foundRoot(false)
399 {
400
401 }
402
403 Boolean XmlParser::next(XmlEntry& entry)
404 {
405 if (!_putBackStack.isEmpty())
406 {
|
407 david.dillard 1.32 entry = _putBackStack.top();
408 _putBackStack.pop();
409 return true;
|
410 mike 1.13 }
411
412 // If a character was overwritten with a null-terminator the last
413 // time this routine was called, then put back that character. Before
414 // exiting of course, restore the null-terminator.
415
416 char* nullTerminator = 0;
417
418 if (_restoreChar && !*_current)
419 {
|
420 david.dillard 1.32 nullTerminator = _current;
421 *_current = _restoreChar;
422 _restoreChar = '\0';
|
423 mike 1.13 }
424
425 // Skip over any whitespace:
426
427 _skipWhitespace(_current);
428
429 if (!*_current)
430 {
|
431 david.dillard 1.32 if (nullTerminator)
432 *nullTerminator = '\0';
|
433 mike 1.13
|
434 david.dillard 1.32 if (!_stack.isEmpty())
435 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
436 mike 1.13
|
437 david.dillard 1.32 return false;
|
438 mike 1.13 }
439
440 // Either a "<...>" or content begins next:
441
442 if (*_current == '<')
443 {
|
444 david.dillard 1.32 _current++;
445 _getElement(_current, entry);
|
446 mike 1.13
|
447 david.dillard 1.32 if (nullTerminator)
448 *nullTerminator = '\0';
|
449 mike 1.13
|
450 david.dillard 1.32 if (entry.type == XmlEntry::START_TAG)
451 {
452 if (_stack.isEmpty() && _foundRoot)
453 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
454
455 _foundRoot = true;
456 _stack.push((char*)entry.text);
457 }
458 else if (entry.type == XmlEntry::END_TAG)
459 {
460 if (_stack.isEmpty())
461 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
462 mike 1.13
|
463 david.dillard 1.32 if (strcmp(_stack.top(), entry.text) != 0)
464 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
465 mike 1.13
|
466 david.dillard 1.32 _stack.pop();
467 }
|
468 mike 1.13
|
469 david.dillard 1.32 return true;
|
470 mike 1.13 }
471 else
472 {
|
473 david.dillard 1.32 entry.type = XmlEntry::CONTENT;
474 entry.text = _current;
475 _getContent(_current);
476 _restoreChar = *_current;
477 *_current = '\0';
|
478 mike 1.13
|
479 david.dillard 1.32 if (nullTerminator)
480 *nullTerminator = '\0';
|
481 mike 1.13
|
482 david.dillard 1.32 _substituteReferences((char*)entry.text);
483 _normalize((char*)entry.text);
|
484 mike 1.13
|
485 david.dillard 1.32 return true;
|
486 mike 1.13 }
487 }
488
489 void XmlParser::putBack(XmlEntry& entry)
490 {
491 _putBackStack.push(entry);
492 }
493
494 XmlParser::~XmlParser()
495 {
496 // Nothing to do!
497 }
498
499 void XmlParser::_skipWhitespace(char*& p)
500 {
|
501 chuck 1.26 while (*p && _isspace(*p))
|
502 mike 1.13 {
|
503 david.dillard 1.32 if (*p == '\n')
504 _line++;
|
505 mike 1.13
|
506 david.dillard 1.32 p++;
|
507 mike 1.13 }
508 }
509
510 Boolean XmlParser::_getElementName(char*& p)
511 {
|
512 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
513 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
514 (*p == '_')))
|
515 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
516 kumpf 1.24 p++;
|
517 mike 1.13
|
518 david 1.22 while ((*p) &&
|
519 david.dillard 1.32 (((*p >= 'A') && (*p <= 'Z')) ||
520 ((*p >= 'a') && (*p <= 'z')) ||
521 ((*p >= '0') && (*p <= '9')) ||
522 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
523 p++;
|
524 mike 1.13
525 // The next character must be a space:
526
|
527 chuck 1.26 if (_isspace(*p))
|
528 mike 1.13 {
|
529 david.dillard 1.32 *p++ = '\0';
530 _skipWhitespace(p);
|
531 mike 1.13 }
532
533 if (*p == '>')
534 {
|
535 david.dillard 1.32 *p++ = '\0';
536 return true;
|
537 mike 1.13 }
538
539 return false;
540 }
541
542 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
543 {
544 openCloseElement = false;
545
|
546 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
547 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
548 (*p == '_')))
|
549 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
550 kumpf 1.24 p++;
|
551 mike 1.13
|
552 david 1.22 while ((*p) &&
|
553 david.dillard 1.32 (((*p >= 'A') && (*p <= 'Z')) ||
554 ((*p >= 'a') && (*p <= 'z')) ||
555 ((*p >= '0') && (*p <= '9')) ||
556 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
557 p++;
|
558 mike 1.13
559 // The next character must be a space:
560
|
561 chuck 1.26 if (_isspace(*p))
|
562 mike 1.13 {
|
563 david.dillard 1.32 *p++ = '\0';
564 _skipWhitespace(p);
|
565 mike 1.13 }
566
567 if (*p == '>')
568 {
|
569 david.dillard 1.32 *p++ = '\0';
570 return true;
|
571 mike 1.13 }
572
573 if (p[0] == '/' && p[1] == '>')
574 {
|
575 david.dillard 1.32 openCloseElement = true;
576 *p = '\0';
577 p += 2;
578 return true;
|
579 mike 1.13 }
580
581 return false;
582 }
583
584 void XmlParser::_getAttributeNameAndEqual(char*& p)
585 {
|
586 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
587 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
588 (*p == '_')))
|
589 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
590 kumpf 1.24 p++;
|
591 mike 1.13
|
592 david 1.22 while ((*p) &&
|
593 david.dillard 1.32 (((*p >= 'A') && (*p <= 'Z')) ||
594 ((*p >= 'a') && (*p <= 'z')) ||
595 ((*p >= '0') && (*p <= '9')) ||
596 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
597 p++;
|
598 mike 1.13
599 char* term = p;
600
601 _skipWhitespace(p);
602
603 if (*p != '=')
|
604 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
605 mike 1.13
606 p++;
607
608 _skipWhitespace(p);
609
610 *term = '\0';
611 }
612
613 void XmlParser::_getAttributeValue(char*& p)
614 {
615 // ATTN-B: handle values contained in semiquotes:
616
617 if (*p != '"' && *p != '\'')
|
618 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
|
619 mike 1.13
620 char startChar = *p++;
621
622 while (*p && *p != startChar)
|
623 david.dillard 1.32 p++;
|
624 mike 1.13
625 if (*p != startChar)
|
626 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
|
627 mike 1.13
628 *p++ = '\0';
629 }
630
631 void XmlParser::_getComment(char*& p)
632 {
633 // Now p points to first non-whitespace character beyond "<--" sequence:
634
635 for (; *p; p++)
636 {
|
637 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
638 {
639 if (p[2] != '>')
640 {
641 throw XmlException(
642 XmlException::MINUS_MINUS_IN_COMMENT, _line);
643 }
644
645 // Find end of comment (excluding whitespace):
646
647 *p = '\0';
648 p += 3;
649 return;
650 }
|
651 mike 1.13 }
652
653 // If it got this far, then the comment is unterminated:
654
655 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
656 }
657
658 void XmlParser::_getCData(char*& p)
659 {
660 // At this point p points one past "<![CDATA[" sequence:
661
662 for (; *p; p++)
663 {
|
664 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
665 {
666 *p = '\0';
667 p += 3;
668 return;
669 }
670 else if (*p == '\n')
671 _line++;
|
672 mike 1.13 }
673
674 // If it got this far, then the comment is unterminated:
675
676 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
677 }
678
679 void XmlParser::_getDocType(char*& p)
680 {
681 // Just ignore the DOCTYPE command for now:
682
683 for (; *p && *p != '>'; p++)
684 {
|
685 david.dillard 1.32 if (*p == '\n')
686 _line++;
|
687 mike 1.13 }
688
689 if (*p != '>')
|
690 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
691 mike 1.13
692 p++;
693 }
694
695 void XmlParser::_getContent(char*& p)
696 {
697 while (*p && *p != '<')
698 {
|
699 david.dillard 1.32 if (*p == '\n')
700 _line++;
|
701 mike 1.13
|
702 david.dillard 1.32 p++;
|
703 mike 1.13 }
704 }
705
706 void XmlParser::_substituteReferences(char* text)
707 {
|
708 david.dillard 1.32 size_t rem = strlen(text);
|
709 mike 1.13
710 for (char* p = text; *p; p++, rem--)
711 {
|
712 david.dillard 1.32 if (*p == '&')
713 {
|
714 kumpf 1.18 // Process character or entity reference
|
715 mike 1.13
|
716 kumpf 1.18 Uint16 referenceChar = 0;
717 Uint32 referenceLength = 0;
718 XmlException::Code code = XmlException::MALFORMED_REFERENCE;
719
720 if (*(p+1) == '#')
721 {
722 // Found a character (numeric) reference
723 // Determine whether it is decimal or hex
724 if (*(p+2) == 'x')
725 {
726 // Decode a hexadecimal character reference
727 char* q = p+3;
728
729 // At most four digits are allowed, plus trailing ';'
730 Uint32 numDigits;
731 for (numDigits = 0; numDigits < 5; numDigits++, q++)
732 {
733 if (isdigit(*q))
734 {
735 referenceChar = (referenceChar << 4);
736 referenceChar += (*q - '0');
737 kumpf 1.18 }
738 else if ((*q >= 'A') && (*q <= 'F'))
739 {
740 referenceChar = (referenceChar << 4);
741 referenceChar += (*q - 'A' + 10);
742 }
743 else if ((*q >= 'a') && (*q <= 'f'))
744 {
745 referenceChar = (referenceChar << 4);
746 referenceChar += (*q - 'a' + 10);
747 }
748 else if (*q == ';')
749 {
750 break;
751 }
752 else
753 {
754 throw XmlException(code, _line);
755 }
756 }
757
758 kumpf 1.18 // Hex number must be 1 - 4 digits
759 if ((numDigits == 0) || (numDigits > 4))
760 {
761 throw XmlException(code, _line);
762 }
763
764 // ATTN: Currently do not support 16-bit characters
765 if (referenceChar > 0xff)
766 {
767 // ATTN: Is there a good way to say "unsupported"?
768 throw XmlException(code, _line);
769 }
770
771 referenceLength = numDigits + 4;
772 }
773 else
774 {
775 // Decode a decimal character reference
776 Uint32 newChar = 0;
777 char* q = p+2;
778
779 kumpf 1.18 // At most five digits are allowed, plus trailing ';'
780 Uint32 numDigits;
781 for (numDigits = 0; numDigits < 6; numDigits++, q++)
782 {
783 if (isdigit(*q))
784 {
785 newChar = (newChar * 10);
786 newChar += (*q - '0');
787 }
788 else if (*q == ';')
789 {
790 break;
791 }
792 else
793 {
794 throw XmlException(code, _line);
795 }
796 }
797
798 // Decimal number must be 1 - 5 digits and fit in 16 bits
799 if ((numDigits == 0) || (numDigits > 5) ||
800 kumpf 1.18 (newChar > 0xffff))
801 {
802 throw XmlException(code, _line);
803 }
804
805 // ATTN: Currently do not support 16-bit characters
806 if (newChar > 0xff)
807 {
808 // ATTN: Is there a good way to say "unsupported"?
809 throw XmlException(code, _line);
810 }
811
812 referenceChar = Uint16(newChar);
813 referenceLength = numDigits + 3;
814 }
815 }
816 else
817 {
818 // Check for entity reference
819 // ATTN: Inefficient if many entity references are supported
820 Uint32 i;
821 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++)
822 {
823 Uint32 length = _references[i].length;
824 const char* match = _references[i].match;
825
826 if (strncmp(p, _references[i].match, length) == 0)
827 {
828 referenceChar = _references[i].replacement;
829 referenceLength = length;
830 break;
831 }
832 }
833
834 if (i == _REFERENCES_SIZE)
835 {
836 // Didn't recognize the entity reference
837 // ATTN: Is there a good way to say "unsupported"?
838 throw XmlException(code, _line);
839 }
840 }
841
842 kumpf 1.18 // Replace the reference with the correct character
843 *p = (char)referenceChar;
844 char* q = p + referenceLength;
845 rem = rem - referenceLength + 1;
846 memmove(p + 1, q, rem);
|
847 david.dillard 1.32 }
|
848 mike 1.13 }
849 }
850
851 static const char _EMPTY_STRING[] = "";
852
853 void XmlParser::_getElement(char*& p, XmlEntry& entry)
854 {
855 entry.attributeCount = 0;
856
857 //--------------------------------------------------------------------------
858 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
859 //--------------------------------------------------------------------------
860
861 if (*p == '?')
862 {
|
863 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
864 entry.text = ++p;
|
865 mike 1.13
|
866 david.dillard 1.32 Boolean openCloseElement = false;
|
867 mike 1.13
|
868 david.dillard 1.32 if (_getElementName(p))
869 return;
|
870 mike 1.13 }
871 else if (*p == '!')
872 {
|
873 david.dillard 1.32 p++;
|
874 mike 1.13
|
875 david.dillard 1.32 // Expect a comment or CDATA:
|
876 mike 1.13
|
877 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
878 {
879 p += 2;
880 entry.type = XmlEntry::COMMENT;
881 entry.text = p;
882 _getComment(p);
883 return;
884 }
885 else if (memcmp(p, "[CDATA[", 7) == 0)
886 {
887 p += 7;
888 entry.type = XmlEntry::CDATA;
889 entry.text = p;
890 _getCData(p);
891 return;
892 }
893 else if (memcmp(p, "DOCTYPE", 7) == 0)
894 {
895 entry.type = XmlEntry::DOCTYPE;
896 entry.text = _EMPTY_STRING;
897 _getDocType(p);
898 david.dillard 1.32 return;
899 }
900 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
901 mike 1.13 }
902 else if (*p == '/')
903 {
|
904 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
905 entry.text = ++p;
|
906 mike 1.13
|
907 david.dillard 1.32 if (!_getElementName(p))
908 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
909 mike 1.13
|
910 david.dillard 1.32 return;
|
911 mike 1.13 }
|
912 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
913 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
914 (*p == '_')))
|
915 mike 1.13 {
|
916 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
917 entry.text = p;
|
918 mike 1.13
|
919 david.dillard 1.32 Boolean openCloseElement = false;
|
920 mike 1.13
|
921 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement))
922 {
923 if (openCloseElement)
924 entry.type = XmlEntry::EMPTY_TAG;
925 return;
926 }
|
927 mike 1.13 }
928 else
|
929 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
930 mike 1.13
931 //--------------------------------------------------------------------------
932 // Grab all the attributes:
933 //--------------------------------------------------------------------------
934
935 for (;;)
936 {
|
937 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
938 {
939 if (p[0] == '?' && p[1] == '>')
940 {
941 p += 2;
942 return;
943 }
944 }
945 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
946 {
947 entry.type = XmlEntry::EMPTY_TAG;
948 p += 2;
949 return;
950 }
951 else if (*p == '>')
952 {
953 p++;
954 return;
955 }
956
957 XmlAttribute attr;
958 david.dillard 1.32 attr.name = p;
959 _getAttributeNameAndEqual(p);
960
961 if (*p != '"' && *p != '\'')
962 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
963
964 attr.value = p + 1;
965 _getAttributeValue(p);
966
967 if (entry.type == XmlEntry::XML_DECLARATION)
968 {
969 // The next thing must a space or a "?>":
|
970 mike 1.13
|
971 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
972 {
973 throw XmlException(
974 XmlException::BAD_ATTRIBUTE_VALUE, _line);
975 }
976 }
977 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
978 {
979 // The next thing must be a space or a '>':
|
980 mike 1.13
|
981 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
982 }
|
983 mike 1.13
|
984 david.dillard 1.32 _skipWhitespace(p);
985
986 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
987 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
988
989 _substituteReferences((char*)attr.value);
990 entry.attributes[entry.attributeCount++] = attr;
|
991 mike 1.13 }
992 }
993
994 static const char* _typeStrings[] =
995 {
|
996 david.dillard 1.32 "XML_DECLARATION",
997 "START_TAG",
998 "EMPTY_TAG",
999 "END_TAG",
|
1000 mike 1.13 "COMMENT",
1001 "CDATA",
1002 "DOCTYPE",
|
1003 david.dillard 1.32 "CONTENT"
|
1004 mike 1.13 };
1005
1006 void XmlEntry::print() const
1007 {
1008 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1009
1010 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1011
1012 if (needQuotes)
|
1013 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
1014
|
1015 mike 1.13 _printValue(text);
1016
1017 if (needQuotes)
|
1018 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
1019 mike 1.13
1020 PEGASUS_STD(cout) << '\n';
1021
1022 for (Uint32 i = 0; i < attributeCount; i++)
1023 {
|
1024 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1025 _printValue(attributes[i].value);
1026 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
1027 mike 1.13 }
1028 }
1029
1030 const XmlAttribute* XmlEntry::findAttribute(
1031 const char* name) const
1032 {
1033 for (Uint32 i = 0; i < attributeCount; i++)
1034 {
|
1035 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
1036 return &attributes[i];
|
1037 mike 1.13 }
1038
1039 return 0;
1040 }
1041
1042 // Find first non-whitespace character (set first) and last non-whitespace
1043 // character (set last one past this). For example, consider this string:
1044 //
|
1045 david.dillard 1.32 // " 87 "
|
1046 mike 1.13 //
1047 // The first pointer would point to '8' and the last pointer woudl point one
1048 // beyond '7'.
1049
1050 static void _findEnds(
|
1051 david.dillard 1.32 const char* str,
1052 const char*& first,
|
1053 mike 1.13 const char*& last)
1054 {
1055 first = str;
1056
|
1057 chuck 1.26 while (_isspace(*first))
|
1058 david.dillard 1.32 first++;
|
1059 mike 1.13
1060 if (!*first)
1061 {
|
1062 david.dillard 1.32 last = first;
1063 return;
|
1064 mike 1.13 }
1065
1066 last = first + strlen(first);
1067
|
1068 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1069 david.dillard 1.32 last--;
|
1070 mike 1.13 }
1071
1072 Boolean XmlEntry::getAttributeValue(
|
1073 david.dillard 1.32 const char* name,
|
1074 mike 1.13 Uint32& value) const
1075 {
1076 const XmlAttribute* attr = findAttribute(name);
1077
1078 if (!attr)
|
1079 david.dillard 1.32 return false;
|
1080 mike 1.13
1081 const char* first;
1082 const char* last;
1083 _findEnds(attr->value, first, last);
1084
1085 char* end = 0;
1086 long tmp = strtol(first, &end, 10);
1087
1088 if (!end || end != last)
|
1089 david.dillard 1.32 return false;
|
1090 mike 1.13
1091 value = Uint32(tmp);
1092 return true;
1093 }
1094
1095 Boolean XmlEntry::getAttributeValue(
|
1096 david.dillard 1.32 const char* name,
|
1097 mike 1.13 Real32& value) const
1098 {
1099 const XmlAttribute* attr = findAttribute(name);
1100
1101 if (!attr)
|
1102 david.dillard 1.32 return false;
|
1103 mike 1.13
1104 const char* first;
1105 const char* last;
1106 _findEnds(attr->value, first, last);
1107
1108 char* end = 0;
1109 double tmp = strtod(first, &end);
1110
1111 if (!end || end != last)
|
1112 david.dillard 1.32 return false;
|
1113 mike 1.13
|
1114 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1115 mike 1.13 return true;
1116 }
1117
1118 Boolean XmlEntry::getAttributeValue(
|
1119 david.dillard 1.32 const char* name,
|
1120 mike 1.13 const char*& value) const
1121 {
1122 const XmlAttribute* attr = findAttribute(name);
1123
1124 if (!attr)
|
1125 david.dillard 1.32 return false;
|
1126 mike 1.13
1127 value = attr->value;
1128 return true;
1129 }
1130
1131 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1132 {
1133 const char* tmp;
1134
1135 if (!getAttributeValue(name, tmp))
|
1136 david.dillard 1.32 return false;
|
1137 mike 1.13
|
1138 chuck 1.28 value = String(tmp);
|
1139 mike 1.13 return true;
1140 }
1141
|
1142 david.dillard 1.30 void XmlAppendCString(Array<char>& out, const char* str)
|
1143 mike 1.13 {
|
1144 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1145 mike 1.13 }
1146
1147 PEGASUS_NAMESPACE_END
|