1 karl 1.27 //%2003////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.27 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Development
4 // Company, L. P., IBM Corp., The Open Group, Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L. P.;
6 // IBM Corp.; EMC Corporation, The Open Group.
|
7 mike 1.13 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
9 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
10 // deal in the Software without restriction, including without limitation the
11 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
12 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
|
15 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
16 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
17 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
18 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
19 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 //
24 //==============================================================================
25 //
26 // Author: Mike Brasher (mbrasher@bmc.com)
27 //
28 // Modified By:
29 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
32 ////////////////////////////////////////////////////////////////////////////////
33 //
34 // XmlParser
35 //
36 // This file contains a simple non-validating XML parser. Here are
37 // serveral rules for well-formed XML:
38 //
39 // 1. Documents must begin with an XML declaration:
40 //
41 // <?xml version="1.0" standalone="yes"?>
42 mike 1.13 //
43 // 2. Comments have the form:
44 //
45 // <!-- blah blah blah -->
46 //
47 // 3. The following entity references are supported:
48 //
49 // & - ampersand
50 // < - less-than
51 // > - greater-than
52 // " - full quote
53 // &apos - apostrophe
54 //
|
55 kumpf 1.18 // as well as character (numeric) references:
56
57 // 1 - decimal reference for character '1'
58 // 1 - hexadecimal reference for character '1'
59 //
|
60 mike 1.13 // 4. Element names and attribute names take the following form:
61 //
62 // [A-Za-z_][A-Za-z_0-9-.:]
63 //
64 // 5. Arbitrary data (CDATA) can be enclosed like this:
65 //
66 // <![CDATA[
67 // ...
68 // ]]>
69 //
70 // 6. Element names and attributes names are case-sensitive.
71 //
72 // 7. XmlAttribute values must be delimited by full or half quotes.
73 // XmlAttribute values must be delimited.
74 //
75 // 8. <!DOCTYPE...>
76 //
77 // TODO:
78 //
|
79 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
|
80 mike 1.13 // Handle <!DOCTYPE...> sections which are complicated (containing
81 // rules rather than references to files).
82 //
83 // Remove newlines from string literals:
84 //
85 // Example: <xyz x="hello
86 // world">
87 //
88 ////////////////////////////////////////////////////////////////////////////////
89
|
90 sage 1.14 #include <Pegasus/Common/Config.h>
|
91 mike 1.13 #include <cctype>
92 #include <cstdio>
93 #include <cstdlib>
94 #include <cstring>
95 #include "XmlParser.h"
96 #include "Logger.h"
|
97 chuck 1.19 #include "ExceptionRep.h"
|
98 mike 1.13
99 PEGASUS_NAMESPACE_BEGIN
100
101 #define PEGASUS_ARRAY_T XmlEntry
102 # include "ArrayImpl.h"
103 #undef PEGASUS_ARRAY_T
104
105
106 ////////////////////////////////////////////////////////////////////////////////
107 //
108 // Static helper functions
109 //
110 ////////////////////////////////////////////////////////////////////////////////
111
112 static void _printValue(const char* p)
113 {
114 for (; *p; p++)
115 {
116 if (*p == '\n')
117 PEGASUS_STD(cout) << "\\n";
118 else if (*p == '\r')
119 mike 1.13 PEGASUS_STD(cout) << "\\r";
120 else if (*p == '\t')
121 PEGASUS_STD(cout) << "\\t";
122 else
123 PEGASUS_STD(cout) << *p;
124 }
125 }
126
127 struct EntityReference
128 {
129 const char* match;
130 Uint32 length;
131 char replacement;
132 };
133
|
134 kumpf 1.18 // ATTN: Add support for more entity references
|
135 mike 1.13 static EntityReference _references[] =
136 {
137 { "&", 5, '&' },
138 { "<", 4, '<' },
139 { ">", 4, '>' },
140 { """, 6, '"' },
141 { "'", 6, '\'' }
142 };
143
|
144 chuck 1.26
145 // Implements a check for a whitespace character, without calling
146 // isspace( ). The isspace( ) function is locale-sensitive,
147 // and incorrectly flags some chars above 0x7f as whitespace. This
148 // causes the XmlParser to incorrectly parse UTF-8 data.
149 //
150 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
151 // defines white space as:
152 // S ::= (#x20 | #x9 | #xD | #xA)+
153 static int _isspace(char c)
154 {
155 if (c == ' ' || c == '\r' || c == '\t' || c == '\n')
156 return 1;
157 return 0;
158 }
159
160
|
161 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
162
163 // Remove all redundant spaces from the given string:
164
165 static void _normalize(char* text)
166 {
167 Uint32 length = strlen(text);
168 char* p = text;
169 char* end = p + length;
170
171 // Remove leading spaces:
172
|
173 chuck 1.26 while (_isspace(*p))
174 p++;
|
175 mike 1.13
176 if (p != text)
177 memmove(text, p, end - p + 1);
178
179 p = text;
180
181 // Look for sequences of more than one space and remove all but one.
182
183 for (;;)
184 {
185 // Advance to the next space:
186
|
187 chuck 1.26 while (*p && !_isspace(*p))
|
188 mike 1.13 p++;
189
190 if (!*p)
191 break;
192
193 // Advance to the next non-space:
194
195 char* q = p++;
196
|
197 chuck 1.26 while (_isspace(*p))
|
198 mike 1.13 p++;
199
200 // Discard trailing spaces (if we are at the end):
201
202 if (!*p)
203 {
204 *q = '\0';
205 break;
206 }
207
208 // Remove the redundant spaces:
209
210 Uint32 n = p - q;
211
212 if (n > 1)
213 {
214 *q++ = ' ';
215 memmove(q, p, end - p + 1);
216 p = q;
217 }
218 }
219 mike 1.13 }
220
221 ////////////////////////////////////////////////////////////////////////////////
222 //
223 // XmlException
224 //
225 ////////////////////////////////////////////////////////////////////////////////
226
227 static const char* _xmlMessages[] =
228 {
229 "Bad opening element",
230 "Bad closing element",
231 "Bad attribute name",
232 "Exepected equal sign",
233 "Bad attribute value",
234 "A \"--\" sequence found within comment",
235 "Unterminated comment",
236 "Unterminated CDATA block",
237 "Unterminated DOCTYPE",
238 "Too many attributes: parser only handles 10",
239 "Malformed reference",
240 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence",
241 "Closing element does not match opening element",
242 "One or more tags are still open",
243 "More than one root element was encountered",
244 "Validation error",
245 "Semantic error"
246 };
247
|
248 chuck 1.19 static const char* _xmlKeys[] =
249 {
|
250 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
251 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
252 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
253 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
254 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
255 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
256 "Common.XmlParser.UNTERMINATED_COMMENT",
257 "Common.XmlParser.UNTERMINATED_CDATA",
258 "Common.XmlParser.UNTERMINATED_DOCTYPE",
259 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
260 "Common.XmlParser.MALFORMED_REFERENCE",
261 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
262 "Common.XmlParser.START_END_MISMATCH",
263 "Common.XmlParser.UNCLOSED_TAGS",
264 "Common.XmlParser.MULTIPLE_ROOTS",
265 "Common.XmlParser.VALIDATION_ERROR",
266 "Common.XmlParser.SEMANTIC_ERROR"
267 };
268
|
269 chuck 1.23 // l10n replace _formMessage (comment out the old one)
|
270 chuck 1.19 /*
|
271 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
272 {
273 String result = _xmlMessages[Uint32(code) - 1];
274
275 char buffer[32];
276 sprintf(buffer, "%d", line);
277 result.append(": on line ");
278 result.append(buffer);
279
280 if (message.size())
281 {
282 result.append(": ");
283 result.append(message);
284 }
285
286 return result;
287 }
|
288 chuck 1.19 */
289
290 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
291 {
292 String dftMsg = _xmlMessages[Uint32(code) - 1];
293 String key = _xmlKeys[Uint32(code) - 1];
294 String msg = message;
295
296 dftMsg.append(": on line $0");
297 if (message.size())
298 {
|
299 humberto 1.20 msg = ": " + msg;
|
300 chuck 1.19 dftMsg.append("$1");
301 }
302
303 return MessageLoaderParms(key, dftMsg, line ,msg);
304 }
305
306 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
307 {
308 String dftMsg = _xmlMessages[Uint32(code) - 1];
309 String key = _xmlKeys[Uint32(code) - 1];
310
311 dftMsg.append(": on line $0");
312
313 return MessageLoaderParms(key, dftMsg, line);
314 }
315
|
316 mike 1.13
317 XmlException::XmlException(
318 XmlException::Code code,
319 Uint32 lineNumber,
320 const String& message)
321 : Exception(_formMessage(code, lineNumber, message))
322 {
323
324 }
325
|
326 chuck 1.19
327 XmlException::XmlException(
328 XmlException::Code code,
329 Uint32 lineNumber,
330 MessageLoaderParms& msgParms)
331 : Exception(_formPartialMessage(code, lineNumber))
332 {
|
333 humberto 1.21 if (msgParms.default_msg.size())
334 {
335 msgParms.default_msg = ": " + msgParms.default_msg;
336 }
|
337 chuck 1.19 _rep->message.append(MessageLoader::getMessage(msgParms));
338 }
339
340
|
341 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
342 //
343 // XmlValidationError
344 //
345 ////////////////////////////////////////////////////////////////////////////////
346
347 XmlValidationError::XmlValidationError(
348 Uint32 lineNumber,
349 const String& message)
350 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
351 {
352
353 }
354
|
355 chuck 1.19
356 XmlValidationError::XmlValidationError(
357 Uint32 lineNumber,
358 MessageLoaderParms& msgParms)
359 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
360 {
361
362 }
363
364
|
365 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
366 //
367 // XmlSemanticError
368 //
369 ////////////////////////////////////////////////////////////////////////////////
370
371 XmlSemanticError::XmlSemanticError(
372 Uint32 lineNumber,
373 const String& message)
374 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
375 {
376
377 }
|
378 chuck 1.19
379
380 XmlSemanticError::XmlSemanticError(
381 Uint32 lineNumber,
382 MessageLoaderParms& msgParms)
383 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
384 {
385
386 }
387
|
388 mike 1.13
389 ////////////////////////////////////////////////////////////////////////////////
390 //
391 // XmlParser
392 //
393 ////////////////////////////////////////////////////////////////////////////////
394
395 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
396 _restoreChar('\0'), _foundRoot(false)
397 {
398
399 }
400
401 Boolean XmlParser::next(XmlEntry& entry)
402 {
403 if (!_putBackStack.isEmpty())
404 {
405 entry = _putBackStack.top();
406 _putBackStack.pop();
407 return true;
408 }
409 mike 1.13
410 // If a character was overwritten with a null-terminator the last
411 // time this routine was called, then put back that character. Before
412 // exiting of course, restore the null-terminator.
413
414 char* nullTerminator = 0;
415
416 if (_restoreChar && !*_current)
417 {
418 nullTerminator = _current;
419 *_current = _restoreChar;
420 _restoreChar = '\0';
421 }
422
423 // Skip over any whitespace:
424
425 _skipWhitespace(_current);
426
427 if (!*_current)
428 {
429 if (nullTerminator)
430 mike 1.13 *nullTerminator = '\0';
431
432 if (!_stack.isEmpty())
433 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
434
435 return false;
436 }
437
438 // Either a "<...>" or content begins next:
439
440 if (*_current == '<')
441 {
442 _current++;
443 _getElement(_current, entry);
444
445 if (nullTerminator)
446 *nullTerminator = '\0';
447
448 if (entry.type == XmlEntry::START_TAG)
449 {
450 if (_stack.isEmpty() && _foundRoot)
451 mike 1.13 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
452
453 _foundRoot = true;
454 _stack.push((char*)entry.text);
455 }
456 else if (entry.type == XmlEntry::END_TAG)
457 {
458 if (_stack.isEmpty())
459 throw XmlException(XmlException::START_END_MISMATCH, _line);
460
461 if (strcmp(_stack.top(), entry.text) != 0)
462 throw XmlException(XmlException::START_END_MISMATCH, _line);
463
464 _stack.pop();
465 }
466
467 return true;
468 }
469 else
470 {
471 entry.type = XmlEntry::CONTENT;
472 mike 1.13 entry.text = _current;
473 _getContent(_current);
474 _restoreChar = *_current;
475 *_current = '\0';
476
477 if (nullTerminator)
478 *nullTerminator = '\0';
479
480 _substituteReferences((char*)entry.text);
481 _normalize((char*)entry.text);
482
483 return true;
484 }
485 }
486
487 void XmlParser::putBack(XmlEntry& entry)
488 {
489 _putBackStack.push(entry);
490 }
491
492 XmlParser::~XmlParser()
493 mike 1.13 {
494 // Nothing to do!
495 }
496
497 void XmlParser::_skipWhitespace(char*& p)
498 {
|
499 chuck 1.26 while (*p && _isspace(*p))
|
500 mike 1.13 {
501 if (*p == '\n')
502 _line++;
503
504 p++;
505 }
506 }
507
508 Boolean XmlParser::_getElementName(char*& p)
509 {
|
510 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
511 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
512 (*p == '_')))
|
513 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
|
514 kumpf 1.24 p++;
|
515 mike 1.13
|
516 david 1.22 while ((*p) &&
517 (((*p >= 'A') && (*p <= 'Z')) ||
518 ((*p >= 'a') && (*p <= 'z')) ||
|
519 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
520 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
521 mike 1.13 p++;
522
523 // The next character must be a space:
524
|
525 chuck 1.26 if (_isspace(*p))
|
526 mike 1.13 {
527 *p++ = '\0';
528 _skipWhitespace(p);
529 }
530
531 if (*p == '>')
532 {
533 *p++ = '\0';
534 return true;
535 }
536
537 return false;
538 }
539
540 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
541 {
542 openCloseElement = false;
543
|
544 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
545 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
546 (*p == '_')))
|
547 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
|
548 kumpf 1.24 p++;
|
549 mike 1.13
|
550 david 1.22 while ((*p) &&
551 (((*p >= 'A') && (*p <= 'Z')) ||
552 ((*p >= 'a') && (*p <= 'z')) ||
|
553 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
554 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
555 mike 1.13 p++;
556
557 // The next character must be a space:
558
|
559 chuck 1.26 if (_isspace(*p))
|
560 mike 1.13 {
561 *p++ = '\0';
562 _skipWhitespace(p);
563 }
564
565 if (*p == '>')
566 {
567 *p++ = '\0';
568 return true;
569 }
570
571 if (p[0] == '/' && p[1] == '>')
572 {
573 openCloseElement = true;
574 *p = '\0';
575 p += 2;
576 return true;
577 }
578
579 return false;
580 }
581 mike 1.13
582 void XmlParser::_getAttributeNameAndEqual(char*& p)
583 {
|
584 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
585 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
586 (*p == '_')))
|
587 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
588 kumpf 1.24 p++;
|
589 mike 1.13
|
590 david 1.22 while ((*p) &&
591 (((*p >= 'A') && (*p <= 'Z')) ||
592 ((*p >= 'a') && (*p <= 'z')) ||
|
593 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
594 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
595 mike 1.13 p++;
596
597 char* term = p;
598
599 _skipWhitespace(p);
600
601 if (*p != '=')
602 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
603
604 p++;
605
606 _skipWhitespace(p);
607
608 *term = '\0';
609 }
610
611 void XmlParser::_getAttributeValue(char*& p)
612 {
613 // ATTN-B: handle values contained in semiquotes:
614
615 if (*p != '"' && *p != '\'')
616 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
617
618 char startChar = *p++;
619
620 while (*p && *p != startChar)
621 p++;
622
623 if (*p != startChar)
624 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
625
626 *p++ = '\0';
627 }
628
629 void XmlParser::_getComment(char*& p)
630 {
631 // Now p points to first non-whitespace character beyond "<--" sequence:
632
633 for (; *p; p++)
634 {
635 if (p[0] == '-' && p[1] == '-')
636 {
637 mike 1.13 if (p[2] != '>')
638 {
639 throw XmlException(
640 XmlException::MINUS_MINUS_IN_COMMENT, _line);
641 }
642
643 // Find end of comment (excluding whitespace):
644
645 *p = '\0';
646 p += 3;
647 return;
648 }
649 }
650
651 // If it got this far, then the comment is unterminated:
652
653 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
654 }
655
656 void XmlParser::_getCData(char*& p)
657 {
658 mike 1.13 // At this point p points one past "<![CDATA[" sequence:
659
660 for (; *p; p++)
661 {
662 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
663 {
664 *p = '\0';
665 p += 3;
666 return;
667 }
668 else if (*p == '\n')
669 _line++;
670 }
671
672 // If it got this far, then the comment is unterminated:
673
674 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
675 }
676
677 void XmlParser::_getDocType(char*& p)
678 {
679 mike 1.13 // Just ignore the DOCTYPE command for now:
680
681 for (; *p && *p != '>'; p++)
682 {
683 if (*p == '\n')
684 _line++;
685 }
686
687 if (*p != '>')
688 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
689
690 p++;
691 }
692
693 void XmlParser::_getContent(char*& p)
694 {
695 while (*p && *p != '<')
696 {
697 if (*p == '\n')
698 _line++;
699
700 mike 1.13 p++;
701 }
702 }
703
704 void XmlParser::_substituteReferences(char* text)
705 {
706 Uint32 rem = strlen(text);
707
708 for (char* p = text; *p; p++, rem--)
709 {
710 if (*p == '&')
711 {
|
712 kumpf 1.18 // Process character or entity reference
|
713 mike 1.13
|
714 kumpf 1.18 Uint16 referenceChar = 0;
715 Uint32 referenceLength = 0;
716 XmlException::Code code = XmlException::MALFORMED_REFERENCE;
717
718 if (*(p+1) == '#')
719 {
720 // Found a character (numeric) reference
721 // Determine whether it is decimal or hex
722 if (*(p+2) == 'x')
723 {
724 // Decode a hexadecimal character reference
725 char* q = p+3;
726
727 // At most four digits are allowed, plus trailing ';'
728 Uint32 numDigits;
729 for (numDigits = 0; numDigits < 5; numDigits++, q++)
730 {
731 if (isdigit(*q))
732 {
733 referenceChar = (referenceChar << 4);
734 referenceChar += (*q - '0');
735 kumpf 1.18 }
736 else if ((*q >= 'A') && (*q <= 'F'))
737 {
738 referenceChar = (referenceChar << 4);
739 referenceChar += (*q - 'A' + 10);
740 }
741 else if ((*q >= 'a') && (*q <= 'f'))
742 {
743 referenceChar = (referenceChar << 4);
744 referenceChar += (*q - 'a' + 10);
745 }
746 else if (*q == ';')
747 {
748 break;
749 }
750 else
751 {
752 throw XmlException(code, _line);
753 }
754 }
755
756 kumpf 1.18 // Hex number must be 1 - 4 digits
757 if ((numDigits == 0) || (numDigits > 4))
758 {
759 throw XmlException(code, _line);
760 }
761
762 // ATTN: Currently do not support 16-bit characters
763 if (referenceChar > 0xff)
764 {
765 // ATTN: Is there a good way to say "unsupported"?
766 throw XmlException(code, _line);
767 }
768
769 referenceLength = numDigits + 4;
770 }
771 else
772 {
773 // Decode a decimal character reference
774 Uint32 newChar = 0;
775 char* q = p+2;
776
777 kumpf 1.18 // At most five digits are allowed, plus trailing ';'
778 Uint32 numDigits;
779 for (numDigits = 0; numDigits < 6; numDigits++, q++)
780 {
781 if (isdigit(*q))
782 {
783 newChar = (newChar * 10);
784 newChar += (*q - '0');
785 }
786 else if (*q == ';')
787 {
788 break;
789 }
790 else
791 {
792 throw XmlException(code, _line);
793 }
794 }
795
796 // Decimal number must be 1 - 5 digits and fit in 16 bits
797 if ((numDigits == 0) || (numDigits > 5) ||
798 kumpf 1.18 (newChar > 0xffff))
799 {
800 throw XmlException(code, _line);
801 }
802
803 // ATTN: Currently do not support 16-bit characters
804 if (newChar > 0xff)
805 {
806 // ATTN: Is there a good way to say "unsupported"?
807 throw XmlException(code, _line);
808 }
809
810 referenceChar = Uint16(newChar);
811 referenceLength = numDigits + 3;
812 }
813 }
814 else
815 {
816 // Check for entity reference
817 // ATTN: Inefficient if many entity references are supported
818 Uint32 i;
819 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++)
820 {
821 Uint32 length = _references[i].length;
822 const char* match = _references[i].match;
823
824 if (strncmp(p, _references[i].match, length) == 0)
825 {
826 referenceChar = _references[i].replacement;
827 referenceLength = length;
828 break;
829 }
830 }
831
832 if (i == _REFERENCES_SIZE)
833 {
834 // Didn't recognize the entity reference
835 // ATTN: Is there a good way to say "unsupported"?
836 throw XmlException(code, _line);
837 }
838 }
839
840 kumpf 1.18 // Replace the reference with the correct character
841 *p = (char)referenceChar;
842 char* q = p + referenceLength;
843 rem = rem - referenceLength + 1;
844 memmove(p + 1, q, rem);
|
845 mike 1.13 }
846 }
847 }
848
849 static const char _EMPTY_STRING[] = "";
850
851 void XmlParser::_getElement(char*& p, XmlEntry& entry)
852 {
853 entry.attributeCount = 0;
854
855 //--------------------------------------------------------------------------
856 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
857 //--------------------------------------------------------------------------
858
859 if (*p == '?')
860 {
861 entry.type = XmlEntry::XML_DECLARATION;
862 entry.text = ++p;
863
864 Boolean openCloseElement = false;
865
866 mike 1.13 if (_getElementName(p))
867 return;
868 }
869 else if (*p == '!')
870 {
871 p++;
872
873 // Expect a comment or CDATA:
874
875 if (p[0] == '-' && p[1] == '-')
876 {
877 p += 2;
878 entry.type = XmlEntry::COMMENT;
879 entry.text = p;
880 _getComment(p);
881 return;
882 }
883 else if (memcmp(p, "[CDATA[", 7) == 0)
884 {
885 p += 7;
886 entry.type = XmlEntry::CDATA;
887 mike 1.13 entry.text = p;
888 _getCData(p);
889 return;
890 }
891 else if (memcmp(p, "DOCTYPE", 7) == 0)
892 {
893 entry.type = XmlEntry::DOCTYPE;
894 entry.text = _EMPTY_STRING;
895 _getDocType(p);
896 return;
897 }
898 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
899 }
900 else if (*p == '/')
901 {
902 entry.type = XmlEntry::END_TAG;
903 entry.text = ++p;
904
905 if (!_getElementName(p))
906 throw(XmlException(XmlException::BAD_END_TAG, _line));
907
908 mike 1.13 return;
909 }
|
910 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
911 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
912 (*p == '_')))
|
913 mike 1.13 {
914 entry.type = XmlEntry::START_TAG;
915 entry.text = p;
916
917 Boolean openCloseElement = false;
918
919 if (_getOpenElementName(p, openCloseElement))
920 {
921 if (openCloseElement)
922 entry.type = XmlEntry::EMPTY_TAG;
923 return;
924 }
925 }
926 else
927 throw XmlException(XmlException::BAD_START_TAG, _line);
928
929 //--------------------------------------------------------------------------
930 // Grab all the attributes:
931 //--------------------------------------------------------------------------
932
933 for (;;)
934 mike 1.13 {
935 if (entry.type == XmlEntry::XML_DECLARATION)
936 {
937 if (p[0] == '?' && p[1] == '>')
938 {
939 p += 2;
940 return;
941 }
942 }
943 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
944 {
945 entry.type = XmlEntry::EMPTY_TAG;
946 p += 2;
947 return;
948 }
949 else if (*p == '>')
950 {
951 p++;
952 return;
953 }
954
955 mike 1.13 XmlAttribute attr;
956 attr.name = p;
957 _getAttributeNameAndEqual(p);
958
959 if (*p != '"' && *p != '\'')
960 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
961
962 attr.value = p + 1;
963 _getAttributeValue(p);
964
965 if (entry.type == XmlEntry::XML_DECLARATION)
966 {
967 // The next thing must a space or a "?>":
968
|
969 chuck 1.26 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
|
970 mike 1.13 {
971 throw XmlException(
972 XmlException::BAD_ATTRIBUTE_VALUE, _line);
973 }
974 }
|
975 chuck 1.26 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
|
976 mike 1.13 {
977 // The next thing must be a space or a '>':
978
979 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
980 }
981
982 _skipWhitespace(p);
983
984 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
985 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
986
987 _substituteReferences((char*)attr.value);
988 entry.attributes[entry.attributeCount++] = attr;
989 }
990 }
991
992 static const char* _typeStrings[] =
993 {
994 "XML_DECLARATION",
995 "START_TAG",
996 "EMPTY_TAG",
997 mike 1.13 "END_TAG",
998 "COMMENT",
999 "CDATA",
1000 "DOCTYPE",
1001 "CONTENT"
1002 };
1003
1004 void XmlEntry::print() const
1005 {
1006 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1007
1008 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1009
1010 if (needQuotes)
1011 PEGASUS_STD(cout) << "\"";
1012
1013 _printValue(text);
1014
1015 if (needQuotes)
1016 PEGASUS_STD(cout) << "\"";
1017
1018 mike 1.13 PEGASUS_STD(cout) << '\n';
1019
1020 for (Uint32 i = 0; i < attributeCount; i++)
1021 {
1022 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1023 _printValue(attributes[i].value);
1024 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1025 }
1026 }
1027
1028 const XmlAttribute* XmlEntry::findAttribute(
1029 const char* name) const
1030 {
1031 for (Uint32 i = 0; i < attributeCount; i++)
1032 {
1033 if (strcmp(attributes[i].name, name) == 0)
1034 return &attributes[i];
1035 }
1036
1037 return 0;
1038 }
1039 mike 1.13
1040 // Find first non-whitespace character (set first) and last non-whitespace
1041 // character (set last one past this). For example, consider this string:
1042 //
1043 // " 87 "
1044 //
1045 // The first pointer would point to '8' and the last pointer woudl point one
1046 // beyond '7'.
1047
1048 static void _findEnds(
1049 const char* str,
1050 const char*& first,
1051 const char*& last)
1052 {
1053 first = str;
1054
|
1055 chuck 1.26 while (_isspace(*first))
|
1056 mike 1.13 first++;
1057
1058 if (!*first)
1059 {
1060 last = first;
1061 return;
1062 }
1063
1064 last = first + strlen(first);
1065
|
1066 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1067 mike 1.13 last--;
1068 }
1069
1070 Boolean XmlEntry::getAttributeValue(
1071 const char* name,
1072 Uint32& value) const
1073 {
1074 const XmlAttribute* attr = findAttribute(name);
1075
1076 if (!attr)
1077 return false;
1078
1079 const char* first;
1080 const char* last;
1081 _findEnds(attr->value, first, last);
1082
1083 char* end = 0;
1084 long tmp = strtol(first, &end, 10);
1085
1086 if (!end || end != last)
1087 return false;
1088 mike 1.13
1089 value = Uint32(tmp);
1090 return true;
1091 }
1092
1093 Boolean XmlEntry::getAttributeValue(
1094 const char* name,
1095 Real32& value) const
1096 {
1097 const XmlAttribute* attr = findAttribute(name);
1098
1099 if (!attr)
1100 return false;
1101
1102 const char* first;
1103 const char* last;
1104 _findEnds(attr->value, first, last);
1105
1106 char* end = 0;
1107 double tmp = strtod(first, &end);
1108
1109 mike 1.13 if (!end || end != last)
1110 return false;
1111
1112 value = Uint32(tmp);
1113 return true;
1114 }
1115
1116 Boolean XmlEntry::getAttributeValue(
1117 const char* name,
1118 const char*& value) const
1119 {
1120 const XmlAttribute* attr = findAttribute(name);
1121
1122 if (!attr)
1123 return false;
1124
1125 value = attr->value;
1126 return true;
1127 }
1128
1129 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1130 mike 1.13 {
1131 const char* tmp;
1132
1133 if (!getAttributeValue(name, tmp))
1134 return false;
1135
|
1136 chuck 1.28 value = String(tmp);
|
1137 mike 1.13 return true;
1138 }
1139
1140 void XmlAppendCString(Array<Sint8>& out, const char* str)
1141 {
1142 out.append(str, strlen(str));
1143 }
1144
1145 PEGASUS_NAMESPACE_END
|