1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.38 //
|
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
34 ////////////////////////////////////////////////////////////////////////////////
35 //
36 // XmlParser
37 //
|
38 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
39 // serveral rules for well-formed XML:
|
40 mike 1.13 //
|
41 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
42 mike 1.13 //
|
43 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
44 mike 1.13 //
|
45 david.dillard 1.32 // 2. Comments have the form:
|
46 mike 1.13 //
|
47 david.dillard 1.32 // <!-- blah blah blah -->
|
48 mike 1.13 //
|
49 david.dillard 1.32 // 3. The following entity references are supported:
|
50 mike 1.13 //
|
51 david.dillard 1.32 // & - ampersand
52 // < - less-than
53 // > - greater-than
54 // " - full quote
55 // &apos - apostrophe
|
56 mike 1.13 //
|
57 kumpf 1.18 // as well as character (numeric) references:
|
58 mike 1.35 //
|
59 kumpf 1.18 // 1 - decimal reference for character '1'
60 // 1 - hexadecimal reference for character '1'
61 //
|
62 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
63 mike 1.13 //
|
64 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
65 mike 1.13 //
|
66 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
67 mike 1.13 //
|
68 david.dillard 1.32 // <![CDATA[
69 // ...
70 // ]]>
|
71 mike 1.13 //
|
72 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
73 mike 1.13 //
|
74 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
75 // XmlAttribute values must be delimited.
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 8. <!DOCTYPE...>
|
78 mike 1.13 //
79 // TODO:
80 //
|
81 kumpf 1.40 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
|
82 mike 1.35 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
83 mike 1.13 // rules rather than references to files).
84 //
|
85 david.dillard 1.32 // Remove newlines from string literals:
|
86 mike 1.13 //
87 // Example: <xyz x="hello
|
88 david.dillard 1.32 // world">
|
89 mike 1.13 //
90 ////////////////////////////////////////////////////////////////////////////////
91
|
92 sage 1.14 #include <Pegasus/Common/Config.h>
|
93 mike 1.13 #include <cctype>
94 #include <cstdio>
95 #include <cstdlib>
96 #include <cstring>
97 #include "XmlParser.h"
98 #include "Logger.h"
|
99 chuck 1.19 #include "ExceptionRep.h"
|
100 mike 1.34 #include "CharSet.h"
|
101 mike 1.13
102 PEGASUS_NAMESPACE_BEGIN
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
|
114 david.dillard 1.32 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
|
122 mike 1.13 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
|
142 chuck 1.26
143 // Implements a check for a whitespace character, without calling
144 // isspace( ). The isspace( ) function is locale-sensitive,
145 // and incorrectly flags some chars above 0x7f as whitespace. This
146 // causes the XmlParser to incorrectly parse UTF-8 data.
147 //
148 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
149 // defines white space as:
|
150 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
151 mike 1.34 static inline int _isspace(char c)
|
152 chuck 1.26 {
|
153 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
154 chuck 1.26 }
155
|
156 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
157
158 ////////////////////////////////////////////////////////////////////////////////
159 //
160 // XmlException
161 //
162 ////////////////////////////////////////////////////////////////////////////////
163
164 static const char* _xmlMessages[] =
165 {
166 "Bad opening element",
167 "Bad closing element",
168 "Bad attribute name",
169 "Exepected equal sign",
170 "Bad attribute value",
171 "A \"--\" sequence found within comment",
172 "Unterminated comment",
173 "Unterminated CDATA block",
174 "Unterminated DOCTYPE",
175 "Too many attributes: parser only handles 10",
176 "Malformed reference",
177 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence",
178 "Closing element does not match opening element",
179 "One or more tags are still open",
180 "More than one root element was encountered",
181 "Validation error",
182 "Semantic error"
183 };
184
|
185 david.dillard 1.32 static const char* _xmlKeys[] =
|
186 chuck 1.19 {
|
187 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
188 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
189 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
190 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
191 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
192 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
193 "Common.XmlParser.UNTERMINATED_COMMENT",
194 "Common.XmlParser.UNTERMINATED_CDATA",
195 "Common.XmlParser.UNTERMINATED_DOCTYPE",
196 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
197 "Common.XmlParser.MALFORMED_REFERENCE",
198 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
199 "Common.XmlParser.START_END_MISMATCH",
|
200 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
201 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
202 "Common.XmlParser.VALIDATION_ERROR",
203 "Common.XmlParser.SEMANTIC_ERROR"
204 };
205
|
206 chuck 1.23 // l10n replace _formMessage (comment out the old one)
|
207 chuck 1.19 /*
|
208 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
209 {
210 String result = _xmlMessages[Uint32(code) - 1];
211
212 char buffer[32];
213 sprintf(buffer, "%d", line);
214 result.append(": on line ");
215 result.append(buffer);
216
217 if (message.size())
218 {
|
219 david.dillard 1.32 result.append(": ");
220 result.append(message);
|
221 mike 1.13 }
222
223 return result;
224 }
|
225 chuck 1.19 */
226
|
227 kumpf 1.40 static MessageLoaderParms _formMessage(
228 Uint32 code,
229 Uint32 line,
230 const String& message)
|
231 chuck 1.19 {
232 String dftMsg = _xmlMessages[Uint32(code) - 1];
233 String key = _xmlKeys[Uint32(code) - 1];
|
234 david.dillard 1.32 String msg = message;
|
235 chuck 1.19
236 dftMsg.append(": on line $0");
237 if (message.size())
238 {
|
239 david.dillard 1.32 msg = ": " + msg;
240 dftMsg.append("$1");
241 }
|
242 chuck 1.19
243 return MessageLoaderParms(key, dftMsg, line ,msg);
244 }
245
246 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
247 {
248 String dftMsg = _xmlMessages[Uint32(code) - 1];
249 String key = _xmlKeys[Uint32(code) - 1];
250
251 dftMsg.append(": on line $0");
|
252 david.dillard 1.32
|
253 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
254 }
255
|
256 mike 1.13
257 XmlException::XmlException(
|
258 david.dillard 1.32 XmlException::Code code,
|
259 mike 1.13 Uint32 lineNumber,
|
260 david.dillard 1.32 const String& message)
|
261 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
262 {
263
264 }
265
|
266 chuck 1.19
267 XmlException::XmlException(
|
268 david.dillard 1.32 XmlException::Code code,
|
269 chuck 1.19 Uint32 lineNumber,
|
270 david.dillard 1.32 MessageLoaderParms& msgParms)
|
271 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
272 {
|
273 david.dillard 1.32 if (msgParms.default_msg.size())
|
274 humberto 1.21 {
|
275 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
276 }
277 _rep->message.append(MessageLoader::getMessage(msgParms));
|
278 chuck 1.19 }
279
280
|
281 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
282 //
283 // XmlValidationError
284 //
285 ////////////////////////////////////////////////////////////////////////////////
286
287 XmlValidationError::XmlValidationError(
288 Uint32 lineNumber,
289 const String& message)
290 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
291 {
292 }
293
|
294 chuck 1.19
295 XmlValidationError::XmlValidationError(
296 Uint32 lineNumber,
297 MessageLoaderParms& msgParms)
298 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
299 {
300 }
301
302
|
303 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
304 //
305 // XmlSemanticError
306 //
307 ////////////////////////////////////////////////////////////////////////////////
308
309 XmlSemanticError::XmlSemanticError(
310 Uint32 lineNumber,
311 const String& message)
312 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
313 {
314 }
|
315 chuck 1.19
316
317 XmlSemanticError::XmlSemanticError(
318 Uint32 lineNumber,
319 MessageLoaderParms& msgParms)
320 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
321 {
322 }
323
|
324 mike 1.13
325 ////////////////////////////////////////////////////////////////////////////////
326 //
327 // XmlParser
328 //
329 ////////////////////////////////////////////////////////////////////////////////
330
|
331 kumpf 1.40 XmlParser::XmlParser(char* text)
332 : _line(1),
333 _text(text),
334 _current(text),
335 _restoreChar('\0'),
336 _foundRoot(false)
|
337 mike 1.13 {
338 }
339
|
340 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
341 {
342 while (*p && _isspace(*p))
343 {
344 if (*p == '\n')
345 line++;
346
347 p++;
348 }
349 }
350
|
351 kumpf 1.37 static int _getEntityRef(char*& p)
352 {
353 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
354 {
355 p += 3;
356 return '>';
357 }
358
359 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
360 {
361 p += 3;
362 return '<';
363 }
364
365 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
366 (p[4] == ';'))
367 {
368 p += 5;
369 return '\'';
370 }
371
372 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
373 (p[4] == ';'))
374 {
375 p += 5;
376 return '"';
377 }
378
379 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
380 {
381 p += 4;
382 return '&';
383 }
384
385 return -1;
386 }
387
388 static inline int _getCharRef(char*& p, bool hex)
389 {
390 char* end;
391 unsigned long ch;
392
393 kumpf 1.37 if (hex)
394 {
395 ch = strtoul(p, &end, 16);
396 }
397 else
398 {
399 ch = strtoul(p, &end, 10);
400 }
401
402 if ((end == p) || (*end != ';') || (ch > 255))
403 {
404 return -1;
405 }
406
407 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
408 {
409 return -1;
410 }
411
412 p = end + 1;
413
414 kumpf 1.37 return ch;
415 }
416
417 static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
418 {
419 // Skip over leading whitespace:
420
421 _skipWhitespace(line, p);
422 start = p;
423
424 // Process one character at a time:
425
426 char* q = p;
427
428 while (*p && (*p != end_char))
429 {
430 if (_isspace(*p))
431 {
432 // Compress sequences of whitespace characters to a single space
433 // character. Update line number when newlines encountered.
434
435 kumpf 1.37 if (*p++ == '\n')
436 {
437 line++;
438 }
439
440 *q++ = ' ';
441
442 _skipWhitespace(line, p);
443 }
444 else if (*p == '&')
445 {
446 // Process entity characters and entity references:
447
448 p++;
449 int ch;
450
451 if (*p == '#')
452 {
453 *p++;
454
455 if (*p == 'x')
456 kumpf 1.37 {
457 p++;
458 ch = _getCharRef(p, true);
459 }
460 else
461 {
462 ch = _getCharRef(p, false);
463 }
464 }
465 else
466 {
467 ch = _getEntityRef(p);
468 }
469
470 if (ch == -1)
471 {
472 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
473 }
474
475 *q++ = ch;
476 }
477 kumpf 1.37 else
478 {
479 *q++ = *p++;
480 }
481 }
482
|
483 kumpf 1.40 // We encountered a the end_char or a zero-terminator.
|
484 kumpf 1.37
485 *q = *p;
486
487 // Remove single trailing whitespace (consecutive whitespaces already
488 // compressed above). Since p >= q, we can tell if we need to strip a
489 // trailing space from q by looking at the end of p. We must not look at
490 // the last character of p, though, if p is an empty string.
491
492 if ((p != start) && _isspace(p[-1]))
493 {
494 q--;
495 }
496
497 // If q got behind p, it is safe and necessary to null-terminate q
498
499 if (q != p)
500 {
501 *q = '\0';
502 }
503 }
504
|
505 mike 1.13 Boolean XmlParser::next(XmlEntry& entry)
506 {
507 if (!_putBackStack.isEmpty())
508 {
|
509 david.dillard 1.32 entry = _putBackStack.top();
510 _putBackStack.pop();
511 return true;
|
512 mike 1.13 }
513
514 // If a character was overwritten with a null-terminator the last
515 // time this routine was called, then put back that character. Before
516 // exiting of course, restore the null-terminator.
517
518 char* nullTerminator = 0;
519
520 if (_restoreChar && !*_current)
521 {
|
522 david.dillard 1.32 nullTerminator = _current;
523 *_current = _restoreChar;
524 _restoreChar = '\0';
|
525 mike 1.13 }
526
527 // Skip over any whitespace:
528
|
529 mike 1.34 _skipWhitespace(_line, _current);
|
530 mike 1.13
531 if (!*_current)
532 {
|
533 david.dillard 1.32 if (nullTerminator)
534 *nullTerminator = '\0';
|
535 mike 1.13
|
536 david.dillard 1.32 if (!_stack.isEmpty())
537 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
538 mike 1.13
|
539 david.dillard 1.32 return false;
|
540 mike 1.13 }
541
542 // Either a "<...>" or content begins next:
543
544 if (*_current == '<')
545 {
|
546 david.dillard 1.32 _current++;
547 _getElement(_current, entry);
|
548 mike 1.13
|
549 david.dillard 1.32 if (nullTerminator)
550 *nullTerminator = '\0';
|
551 mike 1.13
|
552 david.dillard 1.32 if (entry.type == XmlEntry::START_TAG)
553 {
554 if (_stack.isEmpty() && _foundRoot)
555 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
556
557 _foundRoot = true;
558 _stack.push((char*)entry.text);
559 }
560 else if (entry.type == XmlEntry::END_TAG)
561 {
562 if (_stack.isEmpty())
563 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
564 mike 1.13
|
565 david.dillard 1.32 if (strcmp(_stack.top(), entry.text) != 0)
566 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
567 mike 1.13
|
568 david.dillard 1.32 _stack.pop();
569 }
|
570 mike 1.13
|
571 david.dillard 1.32 return true;
|
572 mike 1.13 }
573 else
574 {
|
575 kumpf 1.37 // Normalize the content:
576
577 char* start;
578 _normalize(_line, _current, '<', start);
579
580 // Get the content:
581
|
582 david.dillard 1.32 entry.type = XmlEntry::CONTENT;
|
583 kumpf 1.37 entry.text = start;
584
585 // Overwrite '<' with a null character (temporarily).
586
|
587 david.dillard 1.32 _restoreChar = *_current;
588 *_current = '\0';
|
589 mike 1.13
|
590 david.dillard 1.32 if (nullTerminator)
591 *nullTerminator = '\0';
|
592 mike 1.13
|
593 david.dillard 1.32 return true;
|
594 mike 1.13 }
595 }
596
597 void XmlParser::putBack(XmlEntry& entry)
598 {
599 _putBackStack.push(entry);
600 }
601
602 XmlParser::~XmlParser()
603 {
604 // Nothing to do!
605 }
606
|
607 mike 1.35 // A-Za-z0-9_-:.
|
608 kumpf 1.40 static unsigned char _isInnerElementChar[] =
|
609 mike 1.35 {
610 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
611 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
612 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
613 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
614 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
615 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
616 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
617 };
618
|
619 mike 1.13 Boolean XmlParser::_getElementName(char*& p)
620 {
|
621 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
622 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
623 mike 1.35
|
624 kumpf 1.24 p++;
|
625 mike 1.13
|
626 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
627 david.dillard 1.32 p++;
|
628 mike 1.13
629 // The next character must be a space:
630
|
631 chuck 1.26 if (_isspace(*p))
|
632 mike 1.13 {
|
633 david.dillard 1.32 *p++ = '\0';
|
634 mike 1.34 _skipWhitespace(_line, p);
|
635 mike 1.13 }
636
637 if (*p == '>')
638 {
|
639 david.dillard 1.32 *p++ = '\0';
640 return true;
|
641 mike 1.13 }
642
643 return false;
644 }
645
646 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
647 {
648 openCloseElement = false;
649
|
650 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
651 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
652 mike 1.35
|
653 kumpf 1.24 p++;
|
654 mike 1.13
|
655 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
656 david.dillard 1.32 p++;
|
657 mike 1.13
658 // The next character must be a space:
659
|
660 chuck 1.26 if (_isspace(*p))
|
661 mike 1.13 {
|
662 david.dillard 1.32 *p++ = '\0';
|
663 mike 1.34 _skipWhitespace(_line, p);
|
664 mike 1.13 }
665
666 if (*p == '>')
667 {
|
668 david.dillard 1.32 *p++ = '\0';
669 return true;
|
670 mike 1.13 }
671
672 if (p[0] == '/' && p[1] == '>')
673 {
|
674 david.dillard 1.32 openCloseElement = true;
675 *p = '\0';
676 p += 2;
677 return true;
|
678 mike 1.13 }
679
680 return false;
681 }
682
683 void XmlParser::_getAttributeNameAndEqual(char*& p)
684 {
|
685 mike 1.35 if (!CharSet::isAlNumUnder((Uint8)*p))
|
686 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
687 mike 1.35
|
688 kumpf 1.24 p++;
|
689 mike 1.13
|
690 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
691 david.dillard 1.32 p++;
|
692 mike 1.13
693 char* term = p;
694
|
695 mike 1.34 _skipWhitespace(_line, p);
|
696 mike 1.13
697 if (*p != '=')
|
698 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
699 mike 1.13
700 p++;
701
|
702 mike 1.34 _skipWhitespace(_line, p);
|
703 mike 1.13
704 *term = '\0';
705 }
706
707 void XmlParser::_getComment(char*& p)
708 {
709 // Now p points to first non-whitespace character beyond "<--" sequence:
710
711 for (; *p; p++)
712 {
|
713 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
714 {
715 if (p[2] != '>')
716 {
717 throw XmlException(
718 XmlException::MINUS_MINUS_IN_COMMENT, _line);
719 }
720
721 // Find end of comment (excluding whitespace):
722
723 *p = '\0';
724 p += 3;
725 return;
726 }
|
727 mike 1.13 }
728
729 // If it got this far, then the comment is unterminated:
730
731 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
732 }
733
734 void XmlParser::_getCData(char*& p)
735 {
736 // At this point p points one past "<![CDATA[" sequence:
737
738 for (; *p; p++)
739 {
|
740 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
741 {
742 *p = '\0';
743 p += 3;
744 return;
745 }
746 else if (*p == '\n')
747 _line++;
|
748 mike 1.13 }
749
750 // If it got this far, then the comment is unterminated:
751
752 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
753 }
754
755 void XmlParser::_getDocType(char*& p)
756 {
757 // Just ignore the DOCTYPE command for now:
758
759 for (; *p && *p != '>'; p++)
760 {
|
761 david.dillard 1.32 if (*p == '\n')
762 _line++;
|
763 mike 1.13 }
764
765 if (*p != '>')
|
766 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
767 mike 1.13
768 p++;
769 }
770
771 void XmlParser::_getElement(char*& p, XmlEntry& entry)
772 {
773 entry.attributeCount = 0;
774
775 //--------------------------------------------------------------------------
776 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
777 //--------------------------------------------------------------------------
778
779 if (*p == '?')
780 {
|
781 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
782 entry.text = ++p;
|
783 mike 1.13
|
784 david.dillard 1.32 Boolean openCloseElement = false;
|
785 mike 1.13
|
786 david.dillard 1.32 if (_getElementName(p))
787 return;
|
788 mike 1.13 }
789 else if (*p == '!')
790 {
|
791 david.dillard 1.32 p++;
|
792 mike 1.13
|
793 david.dillard 1.32 // Expect a comment or CDATA:
|
794 mike 1.13
|
795 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
796 {
797 p += 2;
798 entry.type = XmlEntry::COMMENT;
799 entry.text = p;
800 _getComment(p);
801 return;
802 }
803 else if (memcmp(p, "[CDATA[", 7) == 0)
804 {
805 p += 7;
806 entry.type = XmlEntry::CDATA;
807 entry.text = p;
808 _getCData(p);
809 return;
810 }
811 else if (memcmp(p, "DOCTYPE", 7) == 0)
812 {
813 entry.type = XmlEntry::DOCTYPE;
|
814 kumpf 1.37 entry.text = "";
|
815 david.dillard 1.32 _getDocType(p);
816 return;
817 }
818 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
819 mike 1.13 }
820 else if (*p == '/')
821 {
|
822 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
823 entry.text = ++p;
|
824 mike 1.13
|
825 david.dillard 1.32 if (!_getElementName(p))
826 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
827 mike 1.13
|
828 david.dillard 1.32 return;
|
829 mike 1.13 }
|
830 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
831 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
832 (*p == '_')))
|
833 mike 1.13 {
|
834 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
835 entry.text = p;
|
836 mike 1.13
|
837 david.dillard 1.32 Boolean openCloseElement = false;
|
838 mike 1.13
|
839 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement))
840 {
841 if (openCloseElement)
842 entry.type = XmlEntry::EMPTY_TAG;
843 return;
844 }
|
845 mike 1.13 }
846 else
|
847 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
848 mike 1.13
849 //--------------------------------------------------------------------------
850 // Grab all the attributes:
851 //--------------------------------------------------------------------------
852
853 for (;;)
854 {
|
855 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
856 {
857 if (p[0] == '?' && p[1] == '>')
858 {
859 p += 2;
860 return;
861 }
862 }
863 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
864 {
865 entry.type = XmlEntry::EMPTY_TAG;
866 p += 2;
867 return;
868 }
869 else if (*p == '>')
870 {
871 p++;
872 return;
873 }
874
875 XmlAttribute attr;
876 david.dillard 1.32 attr.name = p;
877 _getAttributeNameAndEqual(p);
878
|
879 kumpf 1.37 // Get the attribute value (e.g., "some value")
880 {
881 if ((*p != '"') && (*p != '\''))
882 {
883 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
884 }
885
886 char quote = *p++;
887
888 char* start;
889 _normalize(_line, p, quote, start);
890 attr.value = start;
891
892 if (*p != quote)
893 {
894 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
895 }
896
897 // Overwrite the closing quote with a null-terminator:
|
898 david.dillard 1.32
|
899 kumpf 1.37 *p++ = '\0';
900 }
|
901 david.dillard 1.32
902 if (entry.type == XmlEntry::XML_DECLARATION)
903 {
904 // The next thing must a space or a "?>":
|
905 mike 1.13
|
906 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
907 {
908 throw XmlException(
909 XmlException::BAD_ATTRIBUTE_VALUE, _line);
910 }
911 }
912 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
913 {
914 // The next thing must be a space or a '>':
|
915 mike 1.13
|
916 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
917 }
|
918 mike 1.13
|
919 mike 1.34 _skipWhitespace(_line, p);
|
920 david.dillard 1.32
921 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
922 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
923
924 entry.attributes[entry.attributeCount++] = attr;
|
925 mike 1.13 }
926 }
927
928 static const char* _typeStrings[] =
929 {
|
930 david.dillard 1.32 "XML_DECLARATION",
931 "START_TAG",
932 "EMPTY_TAG",
933 "END_TAG",
|
934 mike 1.13 "COMMENT",
935 "CDATA",
936 "DOCTYPE",
|
937 david.dillard 1.32 "CONTENT"
|
938 mike 1.13 };
939
940 void XmlEntry::print() const
941 {
942 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
943
944 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
945
946 if (needQuotes)
|
947 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
948
|
949 mike 1.13 _printValue(text);
950
951 if (needQuotes)
|
952 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
953 mike 1.13
954 PEGASUS_STD(cout) << '\n';
955
956 for (Uint32 i = 0; i < attributeCount; i++)
957 {
|
958 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
959 _printValue(attributes[i].value);
960 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
961 mike 1.13 }
962 }
963
964 const XmlAttribute* XmlEntry::findAttribute(
965 const char* name) const
966 {
967 for (Uint32 i = 0; i < attributeCount; i++)
968 {
|
969 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
970 return &attributes[i];
|
971 mike 1.13 }
972
973 return 0;
974 }
975
976 // Find first non-whitespace character (set first) and last non-whitespace
977 // character (set last one past this). For example, consider this string:
978 //
|
979 david.dillard 1.32 // " 87 "
|
980 mike 1.13 //
981 // The first pointer would point to '8' and the last pointer woudl point one
982 // beyond '7'.
983
984 static void _findEnds(
|
985 david.dillard 1.32 const char* str,
986 const char*& first,
|
987 mike 1.13 const char*& last)
988 {
989 first = str;
990
|
991 chuck 1.26 while (_isspace(*first))
|
992 david.dillard 1.32 first++;
|
993 mike 1.13
994 if (!*first)
995 {
|
996 david.dillard 1.32 last = first;
997 return;
|
998 mike 1.13 }
999
1000 last = first + strlen(first);
1001
|
1002 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1003 david.dillard 1.32 last--;
|
1004 mike 1.13 }
1005
1006 Boolean XmlEntry::getAttributeValue(
|
1007 david.dillard 1.32 const char* name,
|
1008 mike 1.13 Uint32& value) const
1009 {
1010 const XmlAttribute* attr = findAttribute(name);
1011
1012 if (!attr)
|
1013 david.dillard 1.32 return false;
|
1014 mike 1.13
1015 const char* first;
1016 const char* last;
1017 _findEnds(attr->value, first, last);
1018
1019 char* end = 0;
1020 long tmp = strtol(first, &end, 10);
1021
1022 if (!end || end != last)
|
1023 david.dillard 1.32 return false;
|
1024 mike 1.13
1025 value = Uint32(tmp);
1026 return true;
1027 }
1028
1029 Boolean XmlEntry::getAttributeValue(
|
1030 david.dillard 1.32 const char* name,
|
1031 mike 1.13 Real32& value) const
1032 {
1033 const XmlAttribute* attr = findAttribute(name);
1034
1035 if (!attr)
|
1036 david.dillard 1.32 return false;
|
1037 mike 1.13
1038 const char* first;
1039 const char* last;
1040 _findEnds(attr->value, first, last);
1041
1042 char* end = 0;
1043 double tmp = strtod(first, &end);
1044
1045 if (!end || end != last)
|
1046 david.dillard 1.32 return false;
|
1047 mike 1.13
|
1048 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1049 mike 1.13 return true;
1050 }
1051
1052 Boolean XmlEntry::getAttributeValue(
|
1053 david.dillard 1.32 const char* name,
|
1054 mike 1.13 const char*& value) const
1055 {
1056 const XmlAttribute* attr = findAttribute(name);
1057
1058 if (!attr)
|
1059 david.dillard 1.32 return false;
|
1060 mike 1.13
1061 value = attr->value;
1062 return true;
1063 }
1064
1065 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1066 {
1067 const char* tmp;
1068
1069 if (!getAttributeValue(name, tmp))
|
1070 david.dillard 1.32 return false;
|
1071 mike 1.13
|
1072 chuck 1.28 value = String(tmp);
|
1073 mike 1.13 return true;
1074 }
1075
|
1076 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1077 mike 1.13 {
|
1078 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1079 mike 1.13 }
1080
1081 PEGASUS_NAMESPACE_END
|