1 karl 1.38 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.31 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.38 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.13 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.38 //
|
21 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 // Author: Mike Brasher (mbrasher@bmc.com)
33 //
|
34 david.dillard 1.30 // Modified By: David Dillard, VERITAS Software Corp.
35 // (david.dillard@veritas.com)
|
36 mike 1.13 //
37 //%/////////////////////////////////////////////////////////////////////////////
38
39 ////////////////////////////////////////////////////////////////////////////////
40 //
41 // XmlParser
42 //
|
43 david.dillard 1.32 // This file contains a simple non-validating XML parser. Here are
44 // serveral rules for well-formed XML:
|
45 mike 1.13 //
|
46 david.dillard 1.32 // 1. Documents must begin with an XML declaration:
|
47 mike 1.13 //
|
48 david.dillard 1.32 // <?xml version="1.0" standalone="yes"?>
|
49 mike 1.13 //
|
50 david.dillard 1.32 // 2. Comments have the form:
|
51 mike 1.13 //
|
52 david.dillard 1.32 // <!-- blah blah blah -->
|
53 mike 1.13 //
|
54 david.dillard 1.32 // 3. The following entity references are supported:
|
55 mike 1.13 //
|
56 david.dillard 1.32 // & - ampersand
57 // < - less-than
58 // > - greater-than
59 // " - full quote
60 // &apos - apostrophe
|
61 mike 1.13 //
|
62 kumpf 1.18 // as well as character (numeric) references:
|
63 mike 1.35 //
|
64 kumpf 1.18 // 1 - decimal reference for character '1'
65 // 1 - hexadecimal reference for character '1'
66 //
|
67 david.dillard 1.32 // 4. Element names and attribute names take the following form:
|
68 mike 1.13 //
|
69 david.dillard 1.32 // [A-Za-z_][A-Za-z_0-9-.:]
|
70 mike 1.13 //
|
71 david.dillard 1.32 // 5. Arbitrary data (CDATA) can be enclosed like this:
|
72 mike 1.13 //
|
73 david.dillard 1.32 // <![CDATA[
74 // ...
75 // ]]>
|
76 mike 1.13 //
|
77 david.dillard 1.32 // 6. Element names and attributes names are case-sensitive.
|
78 mike 1.13 //
|
79 david.dillard 1.32 // 7. XmlAttribute values must be delimited by full or half quotes.
80 // XmlAttribute values must be delimited.
|
81 mike 1.13 //
|
82 david.dillard 1.32 // 8. <!DOCTYPE...>
|
83 mike 1.13 //
84 // TODO:
85 //
|
86 mike 1.35 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is
87 // work. Handle <!DOCTYPE...> sections which are complicated (containing
|
88 mike 1.13 // rules rather than references to files).
89 //
|
90 david.dillard 1.32 // Remove newlines from string literals:
|
91 mike 1.13 //
92 // Example: <xyz x="hello
|
93 david.dillard 1.32 // world">
|
94 mike 1.13 //
95 ////////////////////////////////////////////////////////////////////////////////
96
|
97 sage 1.14 #include <Pegasus/Common/Config.h>
|
98 mike 1.13 #include <cctype>
99 #include <cstdio>
100 #include <cstdlib>
101 #include <cstring>
102 #include "XmlParser.h"
103 #include "Logger.h"
|
104 chuck 1.19 #include "ExceptionRep.h"
|
105 mike 1.34 #include "CharSet.h"
|
106 mike 1.13
107 PEGASUS_NAMESPACE_BEGIN
108
109 ////////////////////////////////////////////////////////////////////////////////
110 //
111 // Static helper functions
112 //
113 ////////////////////////////////////////////////////////////////////////////////
114
115 static void _printValue(const char* p)
116 {
117 for (; *p; p++)
118 {
|
119 david.dillard 1.32 if (*p == '\n')
120 PEGASUS_STD(cout) << "\\n";
121 else if (*p == '\r')
122 PEGASUS_STD(cout) << "\\r";
123 else if (*p == '\t')
124 PEGASUS_STD(cout) << "\\t";
125 else
126 PEGASUS_STD(cout) << *p;
|
127 mike 1.13 }
128 }
129
130 struct EntityReference
131 {
132 const char* match;
133 Uint32 length;
134 char replacement;
135 };
136
|
137 kumpf 1.18 // ATTN: Add support for more entity references
|
138 mike 1.13 static EntityReference _references[] =
139 {
140 { "&", 5, '&' },
141 { "<", 4, '<' },
142 { ">", 4, '>' },
143 { """, 6, '"' },
144 { "'", 6, '\'' }
145 };
146
|
147 chuck 1.26
148 // Implements a check for a whitespace character, without calling
149 // isspace( ). The isspace( ) function is locale-sensitive,
150 // and incorrectly flags some chars above 0x7f as whitespace. This
151 // causes the XmlParser to incorrectly parse UTF-8 data.
152 //
153 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
154 // defines white space as:
|
155 david.dillard 1.32 // S ::= (#x20 | #x9 | #xD | #xA)+
|
156 mike 1.34 static inline int _isspace(char c)
|
157 chuck 1.26 {
|
158 kumpf 1.36 return CharSet::isXmlWhiteSpace((Uint8)c);
|
159 chuck 1.26 }
160
|
161 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
162
163 ////////////////////////////////////////////////////////////////////////////////
164 //
165 // XmlException
166 //
167 ////////////////////////////////////////////////////////////////////////////////
168
169 static const char* _xmlMessages[] =
170 {
171 "Bad opening element",
172 "Bad closing element",
173 "Bad attribute name",
174 "Exepected equal sign",
175 "Bad attribute value",
176 "A \"--\" sequence found within comment",
177 "Unterminated comment",
178 "Unterminated CDATA block",
179 "Unterminated DOCTYPE",
180 "Too many attributes: parser only handles 10",
181 "Malformed reference",
182 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence",
183 "Closing element does not match opening element",
184 "One or more tags are still open",
185 "More than one root element was encountered",
186 "Validation error",
187 "Semantic error"
188 };
189
|
190 david.dillard 1.32 static const char* _xmlKeys[] =
|
191 chuck 1.19 {
|
192 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
193 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
194 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
195 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
196 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
197 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
198 "Common.XmlParser.UNTERMINATED_COMMENT",
199 "Common.XmlParser.UNTERMINATED_CDATA",
200 "Common.XmlParser.UNTERMINATED_DOCTYPE",
201 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
202 "Common.XmlParser.MALFORMED_REFERENCE",
203 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
204 "Common.XmlParser.START_END_MISMATCH",
|
205 david.dillard 1.32 "Common.XmlParser.UNCLOSED_TAGS",
|
206 chuck 1.19 "Common.XmlParser.MULTIPLE_ROOTS",
207 "Common.XmlParser.VALIDATION_ERROR",
208 "Common.XmlParser.SEMANTIC_ERROR"
209 };
210
|
211 chuck 1.23 // l10n replace _formMessage (comment out the old one)
|
212 chuck 1.19 /*
|
213 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
214 {
215 String result = _xmlMessages[Uint32(code) - 1];
216
217 char buffer[32];
218 sprintf(buffer, "%d", line);
219 result.append(": on line ");
220 result.append(buffer);
221
222 if (message.size())
223 {
|
224 david.dillard 1.32 result.append(": ");
225 result.append(message);
|
226 mike 1.13 }
227
228 return result;
229 }
|
230 chuck 1.19 */
231
232 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
233 {
234 String dftMsg = _xmlMessages[Uint32(code) - 1];
235 String key = _xmlKeys[Uint32(code) - 1];
|
236 david.dillard 1.32 String msg = message;
|
237 chuck 1.19
238 dftMsg.append(": on line $0");
239 if (message.size())
240 {
|
241 david.dillard 1.32 msg = ": " + msg;
242 dftMsg.append("$1");
243 }
|
244 chuck 1.19
245 return MessageLoaderParms(key, dftMsg, line ,msg);
246 }
247
248 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
249 {
250 String dftMsg = _xmlMessages[Uint32(code) - 1];
251 String key = _xmlKeys[Uint32(code) - 1];
252
253 dftMsg.append(": on line $0");
|
254 david.dillard 1.32
|
255 chuck 1.19 return MessageLoaderParms(key, dftMsg, line);
256 }
257
|
258 mike 1.13
259 XmlException::XmlException(
|
260 david.dillard 1.32 XmlException::Code code,
|
261 mike 1.13 Uint32 lineNumber,
|
262 david.dillard 1.32 const String& message)
|
263 mike 1.13 : Exception(_formMessage(code, lineNumber, message))
264 {
265
266 }
267
|
268 chuck 1.19
269 XmlException::XmlException(
|
270 david.dillard 1.32 XmlException::Code code,
|
271 chuck 1.19 Uint32 lineNumber,
|
272 david.dillard 1.32 MessageLoaderParms& msgParms)
|
273 chuck 1.19 : Exception(_formPartialMessage(code, lineNumber))
274 {
|
275 david.dillard 1.32 if (msgParms.default_msg.size())
|
276 humberto 1.21 {
|
277 david.dillard 1.32 msgParms.default_msg = ": " + msgParms.default_msg;
278 }
279 _rep->message.append(MessageLoader::getMessage(msgParms));
|
280 chuck 1.19 }
281
282
|
283 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
284 //
285 // XmlValidationError
286 //
287 ////////////////////////////////////////////////////////////////////////////////
288
289 XmlValidationError::XmlValidationError(
290 Uint32 lineNumber,
291 const String& message)
292 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
293 {
294
295 }
296
|
297 chuck 1.19
298 XmlValidationError::XmlValidationError(
299 Uint32 lineNumber,
300 MessageLoaderParms& msgParms)
301 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
302 {
303
304 }
305
306
|
307 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
308 //
309 // XmlSemanticError
310 //
311 ////////////////////////////////////////////////////////////////////////////////
312
313 XmlSemanticError::XmlSemanticError(
314 Uint32 lineNumber,
315 const String& message)
316 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
317 {
318
319 }
|
320 chuck 1.19
321
322 XmlSemanticError::XmlSemanticError(
323 Uint32 lineNumber,
324 MessageLoaderParms& msgParms)
325 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
326 {
327
328 }
329
|
330 mike 1.13
331 ////////////////////////////////////////////////////////////////////////////////
332 //
333 // XmlParser
334 //
335 ////////////////////////////////////////////////////////////////////////////////
336
|
337 david.dillard 1.32 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
|
338 mike 1.13 _restoreChar('\0'), _foundRoot(false)
339 {
340
341 }
342
|
343 mike 1.34 inline void _skipWhitespace(Uint32& line, char*& p)
344 {
345 while (*p && _isspace(*p))
346 {
347 if (*p == '\n')
348 line++;
349
350 p++;
351 }
352 }
353
|
354 kumpf 1.37 static int _getEntityRef(char*& p)
355 {
356 if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))
357 {
358 p += 3;
359 return '>';
360 }
361
362 if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))
363 {
364 p += 3;
365 return '<';
366 }
367
368 if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&
369 (p[4] == ';'))
370 {
371 p += 5;
372 return '\'';
373 }
374
375 kumpf 1.37 if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&
376 (p[4] == ';'))
377 {
378 p += 5;
379 return '"';
380 }
381
382 if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))
383 {
384 p += 4;
385 return '&';
386 }
387
388 return -1;
389 }
390
391 static inline int _getCharRef(char*& p, bool hex)
392 {
393 char* end;
394 unsigned long ch;
395
396 kumpf 1.37 if (hex)
397 {
398 ch = strtoul(p, &end, 16);
399 }
400 else
401 {
402 ch = strtoul(p, &end, 10);
403 }
404
405 if ((end == p) || (*end != ';') || (ch > 255))
406 {
407 return -1;
408 }
409
410 if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))
411 {
412 return -1;
413 }
414
415 p = end + 1;
416
417 kumpf 1.37 return ch;
418 }
419
420 static void _normalize(Uint32& line, char*& p, char end_char, char*& start)
421 {
422 // Skip over leading whitespace:
423
424 _skipWhitespace(line, p);
425 start = p;
426
427 // Process one character at a time:
428
429 char* q = p;
430
431 while (*p && (*p != end_char))
432 {
433 if (_isspace(*p))
434 {
435 // Compress sequences of whitespace characters to a single space
436 // character. Update line number when newlines encountered.
437
438 kumpf 1.37 if (*p++ == '\n')
439 {
440 line++;
441 }
442
443 *q++ = ' ';
444
445 _skipWhitespace(line, p);
446 }
447 else if (*p == '&')
448 {
449 // Process entity characters and entity references:
450
451 p++;
452 int ch;
453
454 if (*p == '#')
455 {
456 *p++;
457
458 if (*p == 'x')
459 kumpf 1.37 {
460 p++;
461 ch = _getCharRef(p, true);
462 }
463 else
464 {
465 ch = _getCharRef(p, false);
466 }
467 }
468 else
469 {
470 ch = _getEntityRef(p);
471 }
472
473 if (ch == -1)
474 {
475 throw XmlException(XmlException::MALFORMED_REFERENCE, line);
476 }
477
478 *q++ = ch;
479 }
480 kumpf 1.37 else
481 {
482 *q++ = *p++;
483 }
484 }
485
486 // We encountered a the end_char or a zero-terminator.
487
488 *q = *p;
489
490 // Remove single trailing whitespace (consecutive whitespaces already
491 // compressed above). Since p >= q, we can tell if we need to strip a
492 // trailing space from q by looking at the end of p. We must not look at
493 // the last character of p, though, if p is an empty string.
494
495 if ((p != start) && _isspace(p[-1]))
496 {
497 q--;
498 }
499
500 // If q got behind p, it is safe and necessary to null-terminate q
501 kumpf 1.37
502 if (q != p)
503 {
504 *q = '\0';
505 }
506 }
507
|
508 mike 1.13 Boolean XmlParser::next(XmlEntry& entry)
509 {
510 if (!_putBackStack.isEmpty())
511 {
|
512 david.dillard 1.32 entry = _putBackStack.top();
513 _putBackStack.pop();
514 return true;
|
515 mike 1.13 }
516
517 // If a character was overwritten with a null-terminator the last
518 // time this routine was called, then put back that character. Before
519 // exiting of course, restore the null-terminator.
520
521 char* nullTerminator = 0;
522
523 if (_restoreChar && !*_current)
524 {
|
525 david.dillard 1.32 nullTerminator = _current;
526 *_current = _restoreChar;
527 _restoreChar = '\0';
|
528 mike 1.13 }
529
530 // Skip over any whitespace:
531
|
532 mike 1.34 _skipWhitespace(_line, _current);
|
533 mike 1.13
534 if (!*_current)
535 {
|
536 david.dillard 1.32 if (nullTerminator)
537 *nullTerminator = '\0';
|
538 mike 1.13
|
539 david.dillard 1.32 if (!_stack.isEmpty())
540 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
|
541 mike 1.13
|
542 david.dillard 1.32 return false;
|
543 mike 1.13 }
544
545 // Either a "<...>" or content begins next:
546
547 if (*_current == '<')
548 {
|
549 david.dillard 1.32 _current++;
550 _getElement(_current, entry);
|
551 mike 1.13
|
552 david.dillard 1.32 if (nullTerminator)
553 *nullTerminator = '\0';
|
554 mike 1.13
|
555 david.dillard 1.32 if (entry.type == XmlEntry::START_TAG)
556 {
557 if (_stack.isEmpty() && _foundRoot)
558 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
559
560 _foundRoot = true;
561 _stack.push((char*)entry.text);
562 }
563 else if (entry.type == XmlEntry::END_TAG)
564 {
565 if (_stack.isEmpty())
566 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
567 mike 1.13
|
568 david.dillard 1.32 if (strcmp(_stack.top(), entry.text) != 0)
569 throw XmlException(XmlException::START_END_MISMATCH, _line);
|
570 mike 1.13
|
571 david.dillard 1.32 _stack.pop();
572 }
|
573 mike 1.13
|
574 david.dillard 1.32 return true;
|
575 mike 1.13 }
576 else
577 {
|
578 kumpf 1.37 // Normalize the content:
579
580 char* start;
581 _normalize(_line, _current, '<', start);
582
583 // Get the content:
584
|
585 david.dillard 1.32 entry.type = XmlEntry::CONTENT;
|
586 kumpf 1.37 entry.text = start;
587
588 // Overwrite '<' with a null character (temporarily).
589
|
590 david.dillard 1.32 _restoreChar = *_current;
591 *_current = '\0';
|
592 mike 1.13
|
593 david.dillard 1.32 if (nullTerminator)
594 *nullTerminator = '\0';
|
595 mike 1.13
|
596 david.dillard 1.32 return true;
|
597 mike 1.13 }
598 }
599
600 void XmlParser::putBack(XmlEntry& entry)
601 {
602 _putBackStack.push(entry);
603 }
604
605 XmlParser::~XmlParser()
606 {
607 // Nothing to do!
608 }
609
|
610 mike 1.35 // A-Za-z0-9_-:.
611 static unsigned char _isInnerElementChar[] =
612 {
613 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
614 0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,
615 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
616 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
617 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
618 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
619 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
620 };
621
|
622 mike 1.13 Boolean XmlParser::_getElementName(char*& p)
623 {
|
624 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
625 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
626 mike 1.35
|
627 kumpf 1.24 p++;
|
628 mike 1.13
|
629 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
630 david.dillard 1.32 p++;
|
631 mike 1.13
632 // The next character must be a space:
633
|
634 chuck 1.26 if (_isspace(*p))
|
635 mike 1.13 {
|
636 david.dillard 1.32 *p++ = '\0';
|
637 mike 1.34 _skipWhitespace(_line, p);
|
638 mike 1.13 }
639
640 if (*p == '>')
641 {
|
642 david.dillard 1.32 *p++ = '\0';
643 return true;
|
644 mike 1.13 }
645
646 return false;
647 }
648
649 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
650 {
651 openCloseElement = false;
652
|
653 mike 1.35 if (!CharSet::isAlNumUnder(Uint8(*p)))
|
654 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
655 mike 1.35
|
656 kumpf 1.24 p++;
|
657 mike 1.13
|
658 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
659 david.dillard 1.32 p++;
|
660 mike 1.13
661 // The next character must be a space:
662
|
663 chuck 1.26 if (_isspace(*p))
|
664 mike 1.13 {
|
665 david.dillard 1.32 *p++ = '\0';
|
666 mike 1.34 _skipWhitespace(_line, p);
|
667 mike 1.13 }
668
669 if (*p == '>')
670 {
|
671 david.dillard 1.32 *p++ = '\0';
672 return true;
|
673 mike 1.13 }
674
675 if (p[0] == '/' && p[1] == '>')
676 {
|
677 david.dillard 1.32 openCloseElement = true;
678 *p = '\0';
679 p += 2;
680 return true;
|
681 mike 1.13 }
682
683 return false;
684 }
685
686 void XmlParser::_getAttributeNameAndEqual(char*& p)
687 {
|
688 mike 1.35 if (!CharSet::isAlNumUnder((Uint8)*p))
|
689 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
690 mike 1.35
|
691 kumpf 1.24 p++;
|
692 mike 1.13
|
693 mike 1.35 while (*p && _isInnerElementChar[Uint8(*p)])
|
694 david.dillard 1.32 p++;
|
695 mike 1.13
696 char* term = p;
697
|
698 mike 1.34 _skipWhitespace(_line, p);
|
699 mike 1.13
700 if (*p != '=')
|
701 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
702 mike 1.13
703 p++;
704
|
705 mike 1.34 _skipWhitespace(_line, p);
|
706 mike 1.13
707 *term = '\0';
708 }
709
710 void XmlParser::_getComment(char*& p)
711 {
712 // Now p points to first non-whitespace character beyond "<--" sequence:
713
714 for (; *p; p++)
715 {
|
716 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
717 {
718 if (p[2] != '>')
719 {
720 throw XmlException(
721 XmlException::MINUS_MINUS_IN_COMMENT, _line);
722 }
723
724 // Find end of comment (excluding whitespace):
725
726 *p = '\0';
727 p += 3;
728 return;
729 }
|
730 mike 1.13 }
731
732 // If it got this far, then the comment is unterminated:
733
734 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
735 }
736
737 void XmlParser::_getCData(char*& p)
738 {
739 // At this point p points one past "<![CDATA[" sequence:
740
741 for (; *p; p++)
742 {
|
743 david.dillard 1.32 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
744 {
745 *p = '\0';
746 p += 3;
747 return;
748 }
749 else if (*p == '\n')
750 _line++;
|
751 mike 1.13 }
752
753 // If it got this far, then the comment is unterminated:
754
755 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
756 }
757
758 void XmlParser::_getDocType(char*& p)
759 {
760 // Just ignore the DOCTYPE command for now:
761
762 for (; *p && *p != '>'; p++)
763 {
|
764 david.dillard 1.32 if (*p == '\n')
765 _line++;
|
766 mike 1.13 }
767
768 if (*p != '>')
|
769 david.dillard 1.32 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
|
770 mike 1.13
771 p++;
772 }
773
774 void XmlParser::_getElement(char*& p, XmlEntry& entry)
775 {
776 entry.attributeCount = 0;
777
778 //--------------------------------------------------------------------------
779 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
780 //--------------------------------------------------------------------------
781
782 if (*p == '?')
783 {
|
784 david.dillard 1.32 entry.type = XmlEntry::XML_DECLARATION;
785 entry.text = ++p;
|
786 mike 1.13
|
787 david.dillard 1.32 Boolean openCloseElement = false;
|
788 mike 1.13
|
789 david.dillard 1.32 if (_getElementName(p))
790 return;
|
791 mike 1.13 }
792 else if (*p == '!')
793 {
|
794 david.dillard 1.32 p++;
|
795 mike 1.13
|
796 david.dillard 1.32 // Expect a comment or CDATA:
|
797 mike 1.13
|
798 david.dillard 1.32 if (p[0] == '-' && p[1] == '-')
799 {
800 p += 2;
801 entry.type = XmlEntry::COMMENT;
802 entry.text = p;
803 _getComment(p);
804 return;
805 }
806 else if (memcmp(p, "[CDATA[", 7) == 0)
807 {
808 p += 7;
809 entry.type = XmlEntry::CDATA;
810 entry.text = p;
811 _getCData(p);
812 return;
813 }
814 else if (memcmp(p, "DOCTYPE", 7) == 0)
815 {
816 entry.type = XmlEntry::DOCTYPE;
|
817 kumpf 1.37 entry.text = "";
|
818 david.dillard 1.32 _getDocType(p);
819 return;
820 }
821 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
|
822 mike 1.13 }
823 else if (*p == '/')
824 {
|
825 david.dillard 1.32 entry.type = XmlEntry::END_TAG;
826 entry.text = ++p;
|
827 mike 1.13
|
828 david.dillard 1.32 if (!_getElementName(p))
829 throw(XmlException(XmlException::BAD_END_TAG, _line));
|
830 mike 1.13
|
831 david.dillard 1.32 return;
|
832 mike 1.13 }
|
833 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
834 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
835 (*p == '_')))
|
836 mike 1.13 {
|
837 david.dillard 1.32 entry.type = XmlEntry::START_TAG;
838 entry.text = p;
|
839 mike 1.13
|
840 david.dillard 1.32 Boolean openCloseElement = false;
|
841 mike 1.13
|
842 david.dillard 1.32 if (_getOpenElementName(p, openCloseElement))
843 {
844 if (openCloseElement)
845 entry.type = XmlEntry::EMPTY_TAG;
846 return;
847 }
|
848 mike 1.13 }
849 else
|
850 david.dillard 1.32 throw XmlException(XmlException::BAD_START_TAG, _line);
|
851 mike 1.13
852 //--------------------------------------------------------------------------
853 // Grab all the attributes:
854 //--------------------------------------------------------------------------
855
856 for (;;)
857 {
|
858 david.dillard 1.32 if (entry.type == XmlEntry::XML_DECLARATION)
859 {
860 if (p[0] == '?' && p[1] == '>')
861 {
862 p += 2;
863 return;
864 }
865 }
866 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
867 {
868 entry.type = XmlEntry::EMPTY_TAG;
869 p += 2;
870 return;
871 }
872 else if (*p == '>')
873 {
874 p++;
875 return;
876 }
877
878 XmlAttribute attr;
879 david.dillard 1.32 attr.name = p;
880 _getAttributeNameAndEqual(p);
881
|
882 kumpf 1.37 // Get the attribute value (e.g., "some value")
883 {
884 if ((*p != '"') && (*p != '\''))
885 {
886 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
887 }
888
889 char quote = *p++;
890
891 char* start;
892 _normalize(_line, p, quote, start);
893 attr.value = start;
894
895 if (*p != quote)
896 {
897 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
898 }
899
900 // Overwrite the closing quote with a null-terminator:
|
901 david.dillard 1.32
|
902 kumpf 1.37 *p++ = '\0';
903 }
|
904 david.dillard 1.32
905 if (entry.type == XmlEntry::XML_DECLARATION)
906 {
907 // The next thing must a space or a "?>":
|
908 mike 1.13
|
909 david.dillard 1.32 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
910 {
911 throw XmlException(
912 XmlException::BAD_ATTRIBUTE_VALUE, _line);
913 }
914 }
915 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
916 {
917 // The next thing must be a space or a '>':
|
918 mike 1.13
|
919 david.dillard 1.32 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
920 }
|
921 mike 1.13
|
922 mike 1.34 _skipWhitespace(_line, p);
|
923 david.dillard 1.32
924 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
925 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
926
927 entry.attributes[entry.attributeCount++] = attr;
|
928 mike 1.13 }
929 }
930
931 static const char* _typeStrings[] =
932 {
|
933 david.dillard 1.32 "XML_DECLARATION",
934 "START_TAG",
935 "EMPTY_TAG",
936 "END_TAG",
|
937 mike 1.13 "COMMENT",
938 "CDATA",
939 "DOCTYPE",
|
940 david.dillard 1.32 "CONTENT"
|
941 mike 1.13 };
942
943 void XmlEntry::print() const
944 {
945 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
946
947 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
948
949 if (needQuotes)
|
950 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
951
|
952 mike 1.13 _printValue(text);
953
954 if (needQuotes)
|
955 david.dillard 1.32 PEGASUS_STD(cout) << "\"";
|
956 mike 1.13
957 PEGASUS_STD(cout) << '\n';
958
959 for (Uint32 i = 0; i < attributeCount; i++)
960 {
|
961 david.dillard 1.32 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
962 _printValue(attributes[i].value);
963 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
|
964 mike 1.13 }
965 }
966
967 const XmlAttribute* XmlEntry::findAttribute(
968 const char* name) const
969 {
970 for (Uint32 i = 0; i < attributeCount; i++)
971 {
|
972 david.dillard 1.32 if (strcmp(attributes[i].name, name) == 0)
973 return &attributes[i];
|
974 mike 1.13 }
975
976 return 0;
977 }
978
979 // Find first non-whitespace character (set first) and last non-whitespace
980 // character (set last one past this). For example, consider this string:
981 //
|
982 david.dillard 1.32 // " 87 "
|
983 mike 1.13 //
984 // The first pointer would point to '8' and the last pointer woudl point one
985 // beyond '7'.
986
987 static void _findEnds(
|
988 david.dillard 1.32 const char* str,
989 const char*& first,
|
990 mike 1.13 const char*& last)
991 {
992 first = str;
993
|
994 chuck 1.26 while (_isspace(*first))
|
995 david.dillard 1.32 first++;
|
996 mike 1.13
997 if (!*first)
998 {
|
999 david.dillard 1.32 last = first;
1000 return;
|
1001 mike 1.13 }
1002
1003 last = first + strlen(first);
1004
|
1005 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1006 david.dillard 1.32 last--;
|
1007 mike 1.13 }
1008
1009 Boolean XmlEntry::getAttributeValue(
|
1010 david.dillard 1.32 const char* name,
|
1011 mike 1.13 Uint32& value) const
1012 {
1013 const XmlAttribute* attr = findAttribute(name);
1014
1015 if (!attr)
|
1016 david.dillard 1.32 return false;
|
1017 mike 1.13
1018 const char* first;
1019 const char* last;
1020 _findEnds(attr->value, first, last);
1021
1022 char* end = 0;
1023 long tmp = strtol(first, &end, 10);
1024
1025 if (!end || end != last)
|
1026 david.dillard 1.32 return false;
|
1027 mike 1.13
1028 value = Uint32(tmp);
1029 return true;
1030 }
1031
1032 Boolean XmlEntry::getAttributeValue(
|
1033 david.dillard 1.32 const char* name,
|
1034 mike 1.13 Real32& value) const
1035 {
1036 const XmlAttribute* attr = findAttribute(name);
1037
1038 if (!attr)
|
1039 david.dillard 1.32 return false;
|
1040 mike 1.13
1041 const char* first;
1042 const char* last;
1043 _findEnds(attr->value, first, last);
1044
1045 char* end = 0;
1046 double tmp = strtod(first, &end);
1047
1048 if (!end || end != last)
|
1049 david.dillard 1.32 return false;
|
1050 mike 1.13
|
1051 david.dillard 1.32 value = static_cast<Real32>(tmp);
|
1052 mike 1.13 return true;
1053 }
1054
1055 Boolean XmlEntry::getAttributeValue(
|
1056 david.dillard 1.32 const char* name,
|
1057 mike 1.13 const char*& value) const
1058 {
1059 const XmlAttribute* attr = findAttribute(name);
1060
1061 if (!attr)
|
1062 david.dillard 1.32 return false;
|
1063 mike 1.13
1064 value = attr->value;
1065 return true;
1066 }
1067
1068 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1069 {
1070 const char* tmp;
1071
1072 if (!getAttributeValue(name, tmp))
|
1073 david.dillard 1.32 return false;
|
1074 mike 1.13
|
1075 chuck 1.28 value = String(tmp);
|
1076 mike 1.13 return true;
1077 }
1078
|
1079 mike 1.33 void XmlAppendCString(Buffer& out, const char* str)
|
1080 mike 1.13 {
|
1081 david.dillard 1.32 out.append(str, static_cast<Uint32>(strlen(str)));
|
1082 mike 1.13 }
1083
1084 PEGASUS_NAMESPACE_END
|