1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.38 //
|
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 // Author: Mike Brasher (mbrasher@bmc.com)
33 //
|
34 david.dillard 1.30 // Modified By: David Dillard, VERITAS Software Corp.
35 // (david.dillard@veritas.com)
|
36 mike 1.13 //
37 //%/////////////////////////////////////////////////////////////////////////////
38
39 ////////////////////////////////////////////////////////////////////////////////
40 //
41 // XmlParser
42 //
|
43 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
44 // serveral rules for well-formed XML:
|
45 mike 1.13 //
|
46 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
47 mike 1.13 //
|
48 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
49 mike 1.13 //
|
50 david.dillard 1.32 // 2. Comments have the form:
|
51 mike 1.13 //
|
52 david.dillard 1.32 // <!-- blah blah blah -->
|
53 mike 1.13 //
|
54 david.dillard 1.32 // 3. The following entity references are supported:
|
55 mike 1.13 //
|
56 david.dillard 1.32 // & - ampersand
57 // < - less-than
58 // > - greater-than
59 // " - full quote
60 // &apos - apostrophe
|
61 mike 1.13 //
|
62 kumpf 1.18 // as well as character (numeric) references:
|
63 mike 1.35 //
|
64 kumpf 1.18 // 1 - decimal reference for character '1'
65 // 1 - hexadecimal reference for character '1'
66 //
|
67 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
68 mike 1.13 //
|
69 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
70 mike 1.13 //
|
71 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
72 mike 1.13 //
|
73 david.dillard 1.32 // <![CDATA[
74 // ...
75 // ]]>
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
78 mike 1.13 //
|
79 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
80 // XmlAttribute values must be delimited.
|
81 mike 1.13 //
|
82 david.dillard 1.32 // 8. <!DOCTYPE...>
|
83 mike 1.13 //
84 // TODO:
85 //
|
86 mike 1.35 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
87 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
88 mike 1.13 // rules rather than references to files).
89 //
|
90 david.dillard 1.32 // Remove newlines from string literals:
|
91 mike 1.13 //
92 // Example: <xyz x="hello
|
93 david.dillard 1.32 // world">
|
94 mike 1.13 //
95 ////////////////////////////////////////////////////////////////////////////////
96
|
97 sage 1.14 #include <Pegasus/Common/Config.h>
|
98 mike 1.13 #include <cctype>
99 #include <cstdio>
100 #include <cstdlib>
101 #include <cstring>
102 #include "XmlParser.h"
103 #include "Logger.h"
|
104 chuck 1.19 #include "ExceptionRep.h"
|
105 mike 1.34 #include "CharSet.h"
|
106 mike 1.13
107 PEGASUS_NAMESPACE_BEGIN
108
109 #define PEGASUS_ARRAY_T XmlEntry
110 # include "ArrayImpl.h"
111 #undef PEGASUS_ARRAY_T
112
113 ////////////////////////////////////////////////////////////////////////////////
114 //
115 // Static helper functions
116 //
117 ////////////////////////////////////////////////////////////////////////////////
118
119 static void _printValue(const char* p)
120 {
121 for (; *p; p++)
122 {
|
123 david.dillard 1.32 if (*p == '\n')
124 PEGASUS_STD(cout) << "\\n";
125 else if (*p == '\r')
126 PEGASUS_STD(cout) << "\\r";
127 else if (*p == '\t')
128 PEGASUS_STD(cout) << "\\t";
129 else
130 PEGASUS_STD(cout) << *p;
|
131 mike 1.13 }
132 }
133
134 struct EntityReference
135 {
136 const char* match;
137 Uint32 length;
138 char replacement;
139 };
140
|
141 kumpf 1.18 // ATTN: Add support for more entity references
|
142 mike 1.13 static EntityReference _references[] =
143 {
144 { "&", 5, '&' },
145 { "<", 4, '<' },
146 { ">", 4, '>' },
147 { """, 6, '"' },
148 { "'", 6, '\'' }
149 };
150
|
151 chuck 1.26
152 // Implements a check for a whitespace character, without calling
153 // isspace( ). The isspace( ) function is locale-sensitive,
154 // and incorrectly flags some chars above 0x7f as whitespace. This
155 // causes the XmlParser to incorrectly parse UTF-8 data.
156 //
157 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
158 // defines white space as:
|
159 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
160 mike 1.34 static inline int _isspace(char c)
|
161 chuck 1.26 {
|
162 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
163 chuck 1.26 }
164
|
165 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
166
167 ////////////////////////////////////////////////////////////////////////////////
168 //
169 // XmlException
170 //
171 ////////////////////////////////////////////////////////////////////////////////
172
173 static const char* _xmlMessages[] =
174 {
175 "Bad opening element",
176 "Bad closing element",
177 "Bad attribute name",
178 "Exepected equal sign",
179 "Bad attribute value",
180 "A \"--\" sequence found within comment",
181 "Unterminated comment",
182 "Unterminated CDATA block",
183 "Unterminated DOCTYPE",
184 "Too many attributes: parser only handles 10",
185 "Malformed reference",
186 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence",
187 "Closing element does not match opening element",
188 "One or more tags are still open",
189 "More than one root element was encountered",
190 "Validation error",
191 "Semantic error"
192 };
193
|
194 david.dillard 1.32 static const char* _xmlKeys[] =
|
195 chuck 1.19 {
|
196 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
197 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
198 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
199 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
200 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
201 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
202 "Common.XmlParser.UNTERMINATED_COMMENT",
203 "Common.XmlParser.UNTERMINATED_CDATA",
204 "Common.XmlParser.UNTERMINATED_DOCTYPE",
205 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
206 "Common.XmlParser.MALFORMED_REFERENCE",
207 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
208 "Common.XmlParser.START_END_MISMATCH",
|
209 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
210 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
211 "Common.XmlParser.VALIDATION_ERROR",
212 "Common.XmlParser.SEMANTIC_ERROR"
213 };
214
|
215 chuck 1.23 // l10n replace _formMessage (comment out the old one)
|
216 chuck 1.19 /*
|
217 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
218 {
219 String result = _xmlMessages[Uint32(code) - 1];
220
221 char buffer[32];
222 sprintf(buffer, "%d", line);
223 result.append(": on line ");
224 result.append(buffer);
225
226 if (message.size())
227 {
|
228 david.dillard 1.32 result.append(": ");
229 result.append(message);
|
230 mike 1.13 }
231
232 return result;
233 }
|
234 chuck 1.19 */
235
236 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
237 {
238 String dftMsg = _xmlMessages[Uint32(code) - 1];
239 String key = _xmlKeys[Uint32(code) - 1];
|
240 david.dillard 1.32 String msg = message;
|
241 chuck 1.19
242 dftMsg.append(": on line $0");
243 if (message.size())
244 {
|
245 david.dillard 1.32 msg = ": " + msg;
246 dftMsg.append("$1");
247 }
|
248 chuck 1.19
249 return MessageLoaderParms(key, dftMsg, line ,msg);
250 }
251
252 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
253 {
254 String dftMsg = _xmlMessages[Uint32(code) - 1];
255 String key = _xmlKeys[Uint32(code) - 1];
256
257 dftMsg.append(": on line $0");
|
258 david.dillard 1.32
|
259 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
260 }
261
|
262 mike 1.13
263 XmlException::XmlException(
|
264 david.dillard 1.32 XmlException::Code code,
|
265 mike 1.13 Uint32 lineNumber,
|
266 david.dillard 1.32 const String& message)
|
267 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
268 {
269
270 }
271
|
272 chuck 1.19
273 XmlException::XmlException(
|
274 david.dillard 1.32 XmlException::Code code,
|
275 chuck 1.19 Uint32 lineNumber,
|
276 david.dillard 1.32 MessageLoaderParms& msgParms)
|
277 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
278 {
|
279 david.dillard 1.32 if (msgParms.default_msg.size())
|
280 humberto 1.21 {
|
281 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
282 }
283 _rep->message.append(MessageLoader::getMessage(msgParms));
|
284 chuck 1.19 }
285
286
|
287 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
288 //
289 // XmlValidationError
290 //
291 ////////////////////////////////////////////////////////////////////////////////
292
293 XmlValidationError::XmlValidationError(
294 Uint32 lineNumber,
295 const String& message)
296 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
297 {
298
299 }
300
|
301 chuck 1.19
302 XmlValidationError::XmlValidationError(
303 Uint32 lineNumber,
304 MessageLoaderParms& msgParms)
305 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
306 {
307
308 }
309
310
|
311 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
312 //
313 // XmlSemanticError
314 //
315 ////////////////////////////////////////////////////////////////////////////////
316
317 XmlSemanticError::XmlSemanticError(
318 Uint32 lineNumber,
319 const String& message)
320 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
321 {
322
323 }
|
324 chuck 1.19
325
326 XmlSemanticError::XmlSemanticError(
327 Uint32 lineNumber,
328 MessageLoaderParms& msgParms)
329 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
330 {
331
332 }
333
|
334 mike 1.13
335 ////////////////////////////////////////////////////////////////////////////////
336 //
337 // XmlParser
338 //
339 ////////////////////////////////////////////////////////////////////////////////
340
|
341 david.dillard 1.32 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
|
342 mike 1.13 _restoreChar('\0'), _foundRoot(false)
343 {
344
345 }
346
|
347 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
348 {
349 while (*p && _isspace(*p))
350 {
351 if (*p == '\n')
352 line++;
353
354 p++;
355 }
356 }
357
|
358 kumpf 1.37 static int _getEntityRef(char*& p)
359 {
360 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
361 {
362 p += 3;
363 return '>';
364 }
365
366 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
367 {
368 p += 3;
369 return '<';
370 }
371
372 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
373 (p[4] == ';'))
374 {
375 p += 5;
376 return '\'';
377 }
378
379 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
380 (p[4] == ';'))
381 {
382 p += 5;
383 return '"';
384 }
385
386 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
387 {
388 p += 4;
389 return '&';
390 }
391
392 return -1;
393 }
394
395 static inline int _getCharRef(char*& p, bool hex)
396 {
397 char* end;
398 unsigned long ch;
399
400 kumpf 1.37 if (hex)
401 {
402 ch = strtoul(p, &end, 16);
403 }
404 else
405 {
406 ch = strtoul(p, &end, 10);
407 }
408
409 if ((end == p) || (*end != ';') || (ch > 255))
410 {
411 return -1;
412 }
413
414 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
415 {
416 return -1;
417 }
418
419 p = end + 1;
420
421 kumpf 1.37 return ch;
422 }
423
424 static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
425 {
426 // Skip over leading whitespace:
427
428 _skipWhitespace(line, p);
429 start = p;
430
431 // Process one character at a time:
432
433 char* q = p;
434
435 while (*p && (*p != end_char))
436 {
437 if (_isspace(*p))
438 {
439 // Compress sequences of whitespace characters to a single space
440 // character. Update line number when newlines encountered.
441
442 kumpf 1.37 if (*p++ == '\n')
443 {
444 line++;
445 }
446
447 *q++ = ' ';
448
449 _skipWhitespace(line, p);
450 }
451 else if (*p == '&')
452 {
453 // Process entity characters and entity references:
454
455 p++;
456 int ch;
457
458 if (*p == '#')
459 {
460 *p++;
461
462 if (*p == 'x')
463 kumpf 1.37 {
464 p++;
465 ch = _getCharRef(p, true);
466 }
467 else
468 {
469 ch = _getCharRef(p, false);
470 }
471 }
472 else
473 {
474 ch = _getEntityRef(p);
475 }
476
477 if (ch == -1)
478 {
479 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
480 }
481
482 *q++ = ch;
483 }
484 kumpf 1.37 else
485 {
486 *q++ = *p++;
487 }
488 }
489
490 // We encountered a the end_char or a zero-terminator.
491
492 *q = *p;
493
494 // Remove single trailing whitespace (consecutive whitespaces already
495 // compressed above). Since p >= q, we can tell if we need to strip a
496 // trailing space from q by looking at the end of p. We must not look at
497 // the last character of p, though, if p is an empty string.
498
499 if ((p != start) && _isspace(p[-1]))
500 {
501 q--;
502 }
503
504 // If q got behind p, it is safe and necessary to null-terminate q
505 kumpf 1.37
506 if (q != p)
507 {
508 *q = '\0';
509 }
510 }
511
|
512 mike 1.13 Boolean XmlParser::next(XmlEntry& entry)
513 {
514 if (!_putBackStack.isEmpty())
515 {
|
516 david.dillard 1.32 entry = _putBackStack.top();
517 _putBackStack.pop();
518 return true;
|
519 mike 1.13 }
520
521 // If a character was overwritten with a null-terminator the last
522 // time this routine was called, then put back that character. Before
523 // exiting of course, restore the null-terminator.
524
525 char* nullTerminator = 0;
526
527 if (_restoreChar && !*_current)
528 {
|
529 david.dillard 1.32 nullTerminator = _current;
530 *_current = _restoreChar;
531 _restoreChar = '\0';
|
532 mike 1.13 }
533
534 // Skip over any whitespace:
535
|
536 mike 1.34 _skipWhitespace(_line, _current);
|
537 mike 1.13
538 if (!*_current)
539 {
|
540 david.dillard 1.32 if (nullTerminator)
541 *nullTerminator = '\0';
|
542 mike 1.13
|
543 david.dillard 1.32 if (!_stack.isEmpty())
544 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
545 mike 1.13
|
546 david.dillard 1.32 return false;
|
547 mike 1.13 }
548
549 // Either a "<...>" or content begins next:
550
551 if (*_current == '<')
552 {
|
553 david.dillard 1.32 _current++;
554 _getElement(_current, entry);
|
555 mike 1.13
|
556 david.dillard 1.32 if (nullTerminator)
557 *nullTerminator = '\0';
|
558 mike 1.13
|
559 david.dillard 1.32 if (entry.type == XmlEntry::START_TAG)
560 {
561 if (_stack.isEmpty() && _foundRoot)
562 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
563
564 _foundRoot = true;
565 _stack.push((char*)entry.text);
566 }
567 else if (entry.type == XmlEntry::END_TAG)
568 {
569 if (_stack.isEmpty())
570 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
571 mike 1.13
|
572 david.dillard 1.32 if (strcmp(_stack.top(), entry.text) != 0)
573 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
574 mike 1.13
|
575 david.dillard 1.32 _stack.pop();
576 }
|
577 mike 1.13
|
578 david.dillard 1.32 return true;
|
579 mike 1.13 }
580 else
581 {
|
582 kumpf 1.37 // Normalize the content:
583
584 char* start;
585 _normalize(_line, _current, '<', start);
586
587 // Get the content:
588
|
589 david.dillard 1.32 entry.type = XmlEntry::CONTENT;
|
590 kumpf 1.37 entry.text = start;
591
592 // Overwrite '<' with a null character (temporarily).
593
|
594 david.dillard 1.32 _restoreChar = *_current;
595 *_current = '\0';
|
596 mike 1.13
|
597 david.dillard 1.32 if (nullTerminator)
598 *nullTerminator = '\0';
|
599 mike 1.13
|
600 david.dillard 1.32 return true;
|
601 mike 1.13 }
602 }
603
604 void XmlParser::putBack(XmlEntry& entry)
605 {
606 _putBackStack.push(entry);
607 }
608
609 XmlParser::~XmlParser()
610 {
611 // Nothing to do!
612 }
613
|
614 mike 1.35 // A-Za-z0-9_-:.
615 static unsigned char _isInnerElementChar[] =
616 {
617 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
618 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
619 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
620 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
621 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
622 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
623 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
624 };
625
|
626 mike 1.13 Boolean XmlParser::_getElementName(char*& p)
627 {
|
628 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
629 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
630 mike 1.35
|
631 kumpf 1.24 p++;
|
632 mike 1.13
|
633 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
634 david.dillard 1.32 p++;
|
635 mike 1.13
636 // The next character must be a space:
637
|
638 chuck 1.26 if (_isspace(*p))
|
639 mike 1.13 {
|
640 david.dillard 1.32 *p++ = '\0';
|
641 mike 1.34 _skipWhitespace(_line, p);
|
642 mike 1.13 }
643
644 if (*p == '>')
645 {
|
646 david.dillard 1.32 *p++ = '\0';
647 return true;
|
648 mike 1.13 }
649
650 return false;
651 }
652
653 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
654 {
655 openCloseElement = false;
656
|
657 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
658 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
659 mike 1.35
|
660 kumpf 1.24 p++;
|
661 mike 1.13
|
662 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
663 david.dillard 1.32 p++;
|
664 mike 1.13
665 // The next character must be a space:
666
|
667 chuck 1.26 if (_isspace(*p))
|
668 mike 1.13 {
|
669 david.dillard 1.32 *p++ = '\0';
|
670 mike 1.34 _skipWhitespace(_line, p);
|
671 mike 1.13 }
672
673 if (*p == '>')
674 {
|
675 david.dillard 1.32 *p++ = '\0';
676 return true;
|
677 mike 1.13 }
678
679 if (p[0] == '/' && p[1] == '>')
680 {
|
681 david.dillard 1.32 openCloseElement = true;
682 *p = '\0';
683 p += 2;
684 return true;
|
685 mike 1.13 }
686
687 return false;
688 }
689
690 void XmlParser::_getAttributeNameAndEqual(char*& p)
691 {
|
692 mike 1.35 if (!CharSet::isAlNumUnder((Uint8)*p))
|
693 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
694 mike 1.35
|
695 kumpf 1.24 p++;
|
696 mike 1.13
|
697 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
698 david.dillard 1.32 p++;
|
699 mike 1.13
700 char* term = p;
701
|
702 mike 1.34 _skipWhitespace(_line, p);
|
703 mike 1.13
704 if (*p != '=')
|
705 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
706 mike 1.13
707 p++;
708
|
709 mike 1.34 _skipWhitespace(_line, p);
|
710 mike 1.13
711 *term = '\0';
712 }
713
714 void XmlParser::_getComment(char*& p)
715 {
716 // Now p points to first non-whitespace character beyond "<--" sequence:
717
718 for (; *p; p++)
719 {
|
720 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
721 {
722 if (p[2] != '>')
723 {
724 throw XmlException(
725 XmlException::MINUS_MINUS_IN_COMMENT, _line);
726 }
727
728 // Find end of comment (excluding whitespace):
729
730 *p = '\0';
731 p += 3;
732 return;
733 }
|
734 mike 1.13 }
735
736 // If it got this far, then the comment is unterminated:
737
738 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
739 }
740
741 void XmlParser::_getCData(char*& p)
742 {
743 // At this point p points one past "<![CDATA[" sequence:
744
745 for (; *p; p++)
746 {
|
747 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
748 {
749 *p = '\0';
750 p += 3;
751 return;
752 }
753 else if (*p == '\n')
754 _line++;
|
755 mike 1.13 }
756
757 // If it got this far, then the comment is unterminated:
758
759 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
760 }
761
762 void XmlParser::_getDocType(char*& p)
763 {
764 // Just ignore the DOCTYPE command for now:
765
766 for (; *p && *p != '>'; p++)
767 {
|
768 david.dillard 1.32 if (*p == '\n')
769 _line++;
|
770 mike 1.13 }
771
772 if (*p != '>')
|
773 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
774 mike 1.13
775 p++;
776 }
777
778 void XmlParser::_getElement(char*& p, XmlEntry& entry)
779 {
780 entry.attributeCount = 0;
781
782 //--------------------------------------------------------------------------
783 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
784 //--------------------------------------------------------------------------
785
786 if (*p == '?')
787 {
|
788 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
789 entry.text = ++p;
|
790 mike 1.13
|
791 david.dillard 1.32 Boolean openCloseElement = false;
|
792 mike 1.13
|
793 david.dillard 1.32 if (_getElementName(p))
794 return;
|
795 mike 1.13 }
796 else if (*p == '!')
797 {
|
798 david.dillard 1.32 p++;
|
799 mike 1.13
|
800 david.dillard 1.32 // Expect a comment or CDATA:
|
801 mike 1.13
|
802 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
803 {
804 p += 2;
805 entry.type = XmlEntry::COMMENT;
806 entry.text = p;
807 _getComment(p);
808 return;
809 }
810 else if (memcmp(p, "[CDATA[", 7) == 0)
811 {
812 p += 7;
813 entry.type = XmlEntry::CDATA;
814 entry.text = p;
815 _getCData(p);
816 return;
817 }
818 else if (memcmp(p, "DOCTYPE", 7) == 0)
819 {
820 entry.type = XmlEntry::DOCTYPE;
|
821 kumpf 1.37 entry.text = "";
|
822 david.dillard 1.32 _getDocType(p);
823 return;
824 }
825 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
826 mike 1.13 }
827 else if (*p == '/')
828 {
|
829 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
830 entry.text = ++p;
|
831 mike 1.13
|
832 david.dillard 1.32 if (!_getElementName(p))
833 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
834 mike 1.13
|
835 david.dillard 1.32 return;
|
836 mike 1.13 }
|
837 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
838 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
839 (*p == '_')))
|
840 mike 1.13 {
|
841 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
842 entry.text = p;
|
843 mike 1.13
|
844 david.dillard 1.32 Boolean openCloseElement = false;
|
845 mike 1.13
|
846 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement))
847 {
848 if (openCloseElement)
849 entry.type = XmlEntry::EMPTY_TAG;
850 return;
851 }
|
852 mike 1.13 }
853 else
|
854 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
855 mike 1.13
856 //--------------------------------------------------------------------------
857 // Grab all the attributes:
858 //--------------------------------------------------------------------------
859
860 for (;;)
861 {
|
862 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
863 {
864 if (p[0] == '?' && p[1] == '>')
865 {
866 p += 2;
867 return;
868 }
869 }
870 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
871 {
872 entry.type = XmlEntry::EMPTY_TAG;
873 p += 2;
874 return;
875 }
876 else if (*p == '>')
877 {
878 p++;
879 return;
880 }
881
882 XmlAttribute attr;
883 david.dillard 1.32 attr.name = p;
884 _getAttributeNameAndEqual(p);
885
|
886 kumpf 1.37 // Get the attribute value (e.g., "some value")
887 {
888 if ((*p != '"') && (*p != '\''))
889 {
890 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
891 }
892
893 char quote = *p++;
894
895 char* start;
896 _normalize(_line, p, quote, start);
897 attr.value = start;
898
899 if (*p != quote)
900 {
901 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
902 }
903
904 // Overwrite the closing quote with a null-terminator:
|
905 david.dillard 1.32
|
906 kumpf 1.37 *p++ = '\0';
907 }
|
908 david.dillard 1.32
909 if (entry.type == XmlEntry::XML_DECLARATION)
910 {
911 // The next thing must a space or a "?>":
|
912 mike 1.13
|
913 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
914 {
915 throw XmlException(
916 XmlException::BAD_ATTRIBUTE_VALUE, _line);
917 }
918 }
919 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
920 {
921 // The next thing must be a space or a '>':
|
922 mike 1.13
|
923 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
924 }
|
925 mike 1.13
|
926 mike 1.34 _skipWhitespace(_line, p);
|
927 david.dillard 1.32
928 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
929 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
930
931 entry.attributes[entry.attributeCount++] = attr;
|
932 mike 1.13 }
933 }
934
935 static const char* _typeStrings[] =
936 {
|
937 david.dillard 1.32 "XML_DECLARATION",
938 "START_TAG",
939 "EMPTY_TAG",
940 "END_TAG",
|
941 mike 1.13 "COMMENT",
942 "CDATA",
943 "DOCTYPE",
|
944 david.dillard 1.32 "CONTENT"
|
945 mike 1.13 };
946
947 void XmlEntry::print() const
948 {
949 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
950
951 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
952
953 if (needQuotes)
|
954 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
955
|
956 mike 1.13 _printValue(text);
957
958 if (needQuotes)
|
959 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
960 mike 1.13
961 PEGASUS_STD(cout) << '\n';
962
963 for (Uint32 i = 0; i < attributeCount; i++)
964 {
|
965 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
966 _printValue(attributes[i].value);
967 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
968 mike 1.13 }
969 }
970
971 const XmlAttribute* XmlEntry::findAttribute(
972 const char* name) const
973 {
974 for (Uint32 i = 0; i < attributeCount; i++)
975 {
|
976 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
977 return &attributes[i];
|
978 mike 1.13 }
979
980 return 0;
981 }
982
983 // Find first non-whitespace character (set first) and last non-whitespace
984 // character (set last one past this). For example, consider this string:
985 //
|
986 david.dillard 1.32 // " 87 "
|
987 mike 1.13 //
988 // The first pointer would point to '8' and the last pointer woudl point one
989 // beyond '7'.
990
991 static void _findEnds(
|
992 david.dillard 1.32 const char* str,
993 const char*& first,
|
994 mike 1.13 const char*& last)
995 {
996 first = str;
997
|
998 chuck 1.26 while (_isspace(*first))
|
999 david.dillard 1.32 first++;
|
1000 mike 1.13
1001 if (!*first)
1002 {
|
1003 david.dillard 1.32 last = first;
1004 return;
|
1005 mike 1.13 }
1006
1007 last = first + strlen(first);
1008
|
1009 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1010 david.dillard 1.32 last--;
|
1011 mike 1.13 }
1012
1013 Boolean XmlEntry::getAttributeValue(
|
1014 david.dillard 1.32 const char* name,
|
1015 mike 1.13 Uint32& value) const
1016 {
1017 const XmlAttribute* attr = findAttribute(name);
1018
1019 if (!attr)
|
1020 david.dillard 1.32 return false;
|
1021 mike 1.13
1022 const char* first;
1023 const char* last;
1024 _findEnds(attr->value, first, last);
1025
1026 char* end = 0;
1027 long tmp = strtol(first, &end, 10);
1028
1029 if (!end || end != last)
|
1030 david.dillard 1.32 return false;
|
1031 mike 1.13
1032 value = Uint32(tmp);
1033 return true;
1034 }
1035
1036 Boolean XmlEntry::getAttributeValue(
|
1037 david.dillard 1.32 const char* name,
|
1038 mike 1.13 Real32& value) const
1039 {
1040 const XmlAttribute* attr = findAttribute(name);
1041
1042 if (!attr)
|
1043 david.dillard 1.32 return false;
|
1044 mike 1.13
1045 const char* first;
1046 const char* last;
1047 _findEnds(attr->value, first, last);
1048
1049 char* end = 0;
1050 double tmp = strtod(first, &end);
1051
1052 if (!end || end != last)
|
1053 david.dillard 1.32 return false;
|
1054 mike 1.13
|
1055 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1056 mike 1.13 return true;
1057 }
1058
1059 Boolean XmlEntry::getAttributeValue(
|
1060 david.dillard 1.32 const char* name,
|
1061 mike 1.13 const char*& value) const
1062 {
1063 const XmlAttribute* attr = findAttribute(name);
1064
1065 if (!attr)
|
1066 david.dillard 1.32 return false;
|
1067 mike 1.13
1068 value = attr->value;
1069 return true;
1070 }
1071
1072 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1073 {
1074 const char* tmp;
1075
1076 if (!getAttributeValue(name, tmp))
|
1077 david.dillard 1.32 return false;
|
1078 mike 1.13
|
1079 chuck 1.28 value = String(tmp);
|
1080 mike 1.13 return true;
1081 }
1082
|
1083 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1084 mike 1.13 {
|
1085 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1086 mike 1.13 }
1087
1088 PEGASUS_NAMESPACE_END
|