1 karl 1.29 //%2004////////////////////////////////////////////////////////////////////////
|
2 mike 1.13 //
|
3 karl 1.29 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.27 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.29 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 mike 1.13 //
10 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
11 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
12 // deal in the Software without restriction, including without limitation the
13 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
14 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
15 // furnished to do so, subject to the following conditions:
16 //
|
17 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
18 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
19 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
20 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
21 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
23 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 //
26 //==============================================================================
27 //
28 // Author: Mike Brasher (mbrasher@bmc.com)
29 //
30 // Modified By:
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
34 ////////////////////////////////////////////////////////////////////////////////
35 //
36 // XmlParser
37 //
38 // This file contains a simple non-validating XML parser. Here are
39 // serveral rules for well-formed XML:
40 //
41 // 1. Documents must begin with an XML declaration:
42 //
43 // <?xml version="1.0" standalone="yes"?>
44 mike 1.13 //
45 // 2. Comments have the form:
46 //
47 // <!-- blah blah blah -->
48 //
49 // 3. The following entity references are supported:
50 //
51 // & - ampersand
52 // < - less-than
53 // > - greater-than
54 // " - full quote
55 // &apos - apostrophe
56 //
|
57 kumpf 1.18 // as well as character (numeric) references:
58
59 // 1 - decimal reference for character '1'
60 // 1 - hexadecimal reference for character '1'
61 //
|
62 mike 1.13 // 4. Element names and attribute names take the following form:
63 //
64 // [A-Za-z_][A-Za-z_0-9-.:]
65 //
66 // 5. Arbitrary data (CDATA) can be enclosed like this:
67 //
68 // <![CDATA[
69 // ...
70 // ]]>
71 //
72 // 6. Element names and attributes names are case-sensitive.
73 //
74 // 7. XmlAttribute values must be delimited by full or half quotes.
75 // XmlAttribute values must be delimited.
76 //
77 // 8. <!DOCTYPE...>
78 //
79 // TODO:
80 //
|
81 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
|
82 mike 1.13 // Handle <!DOCTYPE...> sections which are complicated (containing
83 // rules rather than references to files).
84 //
85 // Remove newlines from string literals:
86 //
87 // Example: <xyz x="hello
88 // world">
89 //
90 ////////////////////////////////////////////////////////////////////////////////
91
|
92 sage 1.14 #include <Pegasus/Common/Config.h>
|
93 mike 1.13 #include <cctype>
94 #include <cstdio>
95 #include <cstdlib>
96 #include <cstring>
97 #include "XmlParser.h"
98 #include "Logger.h"
|
99 chuck 1.19 #include "ExceptionRep.h"
|
100 mike 1.13
101 PEGASUS_NAMESPACE_BEGIN
102
103 #define PEGASUS_ARRAY_T XmlEntry
104 # include "ArrayImpl.h"
105 #undef PEGASUS_ARRAY_T
106
107
108 ////////////////////////////////////////////////////////////////////////////////
109 //
110 // Static helper functions
111 //
112 ////////////////////////////////////////////////////////////////////////////////
113
114 static void _printValue(const char* p)
115 {
116 for (; *p; p++)
117 {
118 if (*p == '\n')
119 PEGASUS_STD(cout) << "\\n";
120 else if (*p == '\r')
121 mike 1.13 PEGASUS_STD(cout) << "\\r";
122 else if (*p == '\t')
123 PEGASUS_STD(cout) << "\\t";
124 else
125 PEGASUS_STD(cout) << *p;
126 }
127 }
128
129 struct EntityReference
130 {
131 const char* match;
132 Uint32 length;
133 char replacement;
134 };
135
|
136 kumpf 1.18 // ATTN: Add support for more entity references
|
137 mike 1.13 static EntityReference _references[] =
138 {
139 { "&", 5, '&' },
140 { "<", 4, '<' },
141 { ">", 4, '>' },
142 { """, 6, '"' },
143 { "'", 6, '\'' }
144 };
145
|
146 chuck 1.26
147 // Implements a check for a whitespace character, without calling
148 // isspace( ). The isspace( ) function is locale-sensitive,
149 // and incorrectly flags some chars above 0x7f as whitespace. This
150 // causes the XmlParser to incorrectly parse UTF-8 data.
151 //
152 // Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)
153 // defines white space as:
154 // S ::= (#x20 | #x9 | #xD | #xA)+
155 static int _isspace(char c)
156 {
157 if (c == ' ' || c == '\r' || c == '\t' || c == '\n')
158 return 1;
159 return 0;
160 }
161
162
|
163 mike 1.13 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
164
165 // Remove all redundant spaces from the given string:
166
167 static void _normalize(char* text)
168 {
169 Uint32 length = strlen(text);
170 char* p = text;
171 char* end = p + length;
172
173 // Remove leading spaces:
174
|
175 chuck 1.26 while (_isspace(*p))
176 p++;
|
177 mike 1.13
178 if (p != text)
179 memmove(text, p, end - p + 1);
180
181 p = text;
182
183 // Look for sequences of more than one space and remove all but one.
184
185 for (;;)
186 {
187 // Advance to the next space:
188
|
189 chuck 1.26 while (*p && !_isspace(*p))
|
190 mike 1.13 p++;
191
192 if (!*p)
193 break;
194
195 // Advance to the next non-space:
196
197 char* q = p++;
198
|
199 chuck 1.26 while (_isspace(*p))
|
200 mike 1.13 p++;
201
202 // Discard trailing spaces (if we are at the end):
203
204 if (!*p)
205 {
206 *q = '\0';
207 break;
208 }
209
210 // Remove the redundant spaces:
211
212 Uint32 n = p - q;
213
214 if (n > 1)
215 {
216 *q++ = ' ';
217 memmove(q, p, end - p + 1);
218 p = q;
219 }
220 }
221 mike 1.13 }
222
223 ////////////////////////////////////////////////////////////////////////////////
224 //
225 // XmlException
226 //
227 ////////////////////////////////////////////////////////////////////////////////
228
229 static const char* _xmlMessages[] =
230 {
231 "Bad opening element",
232 "Bad closing element",
233 "Bad attribute name",
234 "Exepected equal sign",
235 "Bad attribute value",
236 "A \"--\" sequence found within comment",
237 "Unterminated comment",
238 "Unterminated CDATA block",
239 "Unterminated DOCTYPE",
240 "Too many attributes: parser only handles 10",
241 "Malformed reference",
242 mike 1.13 "Expected a comment or CDATA following \"<!\" sequence",
243 "Closing element does not match opening element",
244 "One or more tags are still open",
245 "More than one root element was encountered",
246 "Validation error",
247 "Semantic error"
248 };
249
|
250 chuck 1.19 static const char* _xmlKeys[] =
251 {
|
252 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
253 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
254 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
255 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
256 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
257 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
258 "Common.XmlParser.UNTERMINATED_COMMENT",
259 "Common.XmlParser.UNTERMINATED_CDATA",
260 "Common.XmlParser.UNTERMINATED_DOCTYPE",
261 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
262 "Common.XmlParser.MALFORMED_REFERENCE",
263 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
264 "Common.XmlParser.START_END_MISMATCH",
265 "Common.XmlParser.UNCLOSED_TAGS",
266 "Common.XmlParser.MULTIPLE_ROOTS",
267 "Common.XmlParser.VALIDATION_ERROR",
268 "Common.XmlParser.SEMANTIC_ERROR"
269 };
270
|
271 chuck 1.23 // l10n replace _formMessage (comment out the old one)
|
272 chuck 1.19 /*
|
273 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
274 {
275 String result = _xmlMessages[Uint32(code) - 1];
276
277 char buffer[32];
278 sprintf(buffer, "%d", line);
279 result.append(": on line ");
280 result.append(buffer);
281
282 if (message.size())
283 {
284 result.append(": ");
285 result.append(message);
286 }
287
288 return result;
289 }
|
290 chuck 1.19 */
291
292 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
293 {
294 String dftMsg = _xmlMessages[Uint32(code) - 1];
295 String key = _xmlKeys[Uint32(code) - 1];
296 String msg = message;
297
298 dftMsg.append(": on line $0");
299 if (message.size())
300 {
|
301 humberto 1.20 msg = ": " + msg;
|
302 chuck 1.19 dftMsg.append("$1");
303 }
304
305 return MessageLoaderParms(key, dftMsg, line ,msg);
306 }
307
308 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
309 {
310 String dftMsg = _xmlMessages[Uint32(code) - 1];
311 String key = _xmlKeys[Uint32(code) - 1];
312
313 dftMsg.append(": on line $0");
314
315 return MessageLoaderParms(key, dftMsg, line);
316 }
317
|
318 mike 1.13
319 XmlException::XmlException(
320 XmlException::Code code,
321 Uint32 lineNumber,
322 const String& message)
323 : Exception(_formMessage(code, lineNumber, message))
324 {
325
326 }
327
|
328 chuck 1.19
329 XmlException::XmlException(
330 XmlException::Code code,
331 Uint32 lineNumber,
332 MessageLoaderParms& msgParms)
333 : Exception(_formPartialMessage(code, lineNumber))
334 {
|
335 humberto 1.21 if (msgParms.default_msg.size())
336 {
337 msgParms.default_msg = ": " + msgParms.default_msg;
338 }
|
339 chuck 1.19 _rep->message.append(MessageLoader::getMessage(msgParms));
340 }
341
342
|
343 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
344 //
345 // XmlValidationError
346 //
347 ////////////////////////////////////////////////////////////////////////////////
348
349 XmlValidationError::XmlValidationError(
350 Uint32 lineNumber,
351 const String& message)
352 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
353 {
354
355 }
356
|
357 chuck 1.19
358 XmlValidationError::XmlValidationError(
359 Uint32 lineNumber,
360 MessageLoaderParms& msgParms)
361 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
362 {
363
364 }
365
366
|
367 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
368 //
369 // XmlSemanticError
370 //
371 ////////////////////////////////////////////////////////////////////////////////
372
373 XmlSemanticError::XmlSemanticError(
374 Uint32 lineNumber,
375 const String& message)
376 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
377 {
378
379 }
|
380 chuck 1.19
381
382 XmlSemanticError::XmlSemanticError(
383 Uint32 lineNumber,
384 MessageLoaderParms& msgParms)
385 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
386 {
387
388 }
389
|
390 mike 1.13
391 ////////////////////////////////////////////////////////////////////////////////
392 //
393 // XmlParser
394 //
395 ////////////////////////////////////////////////////////////////////////////////
396
397 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
398 _restoreChar('\0'), _foundRoot(false)
399 {
400
401 }
402
403 Boolean XmlParser::next(XmlEntry& entry)
404 {
405 if (!_putBackStack.isEmpty())
406 {
407 entry = _putBackStack.top();
408 _putBackStack.pop();
409 return true;
410 }
411 mike 1.13
412 // If a character was overwritten with a null-terminator the last
413 // time this routine was called, then put back that character. Before
414 // exiting of course, restore the null-terminator.
415
416 char* nullTerminator = 0;
417
418 if (_restoreChar && !*_current)
419 {
420 nullTerminator = _current;
421 *_current = _restoreChar;
422 _restoreChar = '\0';
423 }
424
425 // Skip over any whitespace:
426
427 _skipWhitespace(_current);
428
429 if (!*_current)
430 {
431 if (nullTerminator)
432 mike 1.13 *nullTerminator = '\0';
433
434 if (!_stack.isEmpty())
435 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
436
437 return false;
438 }
439
440 // Either a "<...>" or content begins next:
441
442 if (*_current == '<')
443 {
444 _current++;
445 _getElement(_current, entry);
446
447 if (nullTerminator)
448 *nullTerminator = '\0';
449
450 if (entry.type == XmlEntry::START_TAG)
451 {
452 if (_stack.isEmpty() && _foundRoot)
453 mike 1.13 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
454
455 _foundRoot = true;
456 _stack.push((char*)entry.text);
457 }
458 else if (entry.type == XmlEntry::END_TAG)
459 {
460 if (_stack.isEmpty())
461 throw XmlException(XmlException::START_END_MISMATCH, _line);
462
463 if (strcmp(_stack.top(), entry.text) != 0)
464 throw XmlException(XmlException::START_END_MISMATCH, _line);
465
466 _stack.pop();
467 }
468
469 return true;
470 }
471 else
472 {
473 entry.type = XmlEntry::CONTENT;
474 mike 1.13 entry.text = _current;
475 _getContent(_current);
476 _restoreChar = *_current;
477 *_current = '\0';
478
479 if (nullTerminator)
480 *nullTerminator = '\0';
481
482 _substituteReferences((char*)entry.text);
483 _normalize((char*)entry.text);
484
485 return true;
486 }
487 }
488
489 void XmlParser::putBack(XmlEntry& entry)
490 {
491 _putBackStack.push(entry);
492 }
493
494 XmlParser::~XmlParser()
495 mike 1.13 {
496 // Nothing to do!
497 }
498
499 void XmlParser::_skipWhitespace(char*& p)
500 {
|
501 chuck 1.26 while (*p && _isspace(*p))
|
502 mike 1.13 {
503 if (*p == '\n')
504 _line++;
505
506 p++;
507 }
508 }
509
510 Boolean XmlParser::_getElementName(char*& p)
511 {
|
512 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
513 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
514 (*p == '_')))
|
515 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
|
516 kumpf 1.24 p++;
|
517 mike 1.13
|
518 david 1.22 while ((*p) &&
519 (((*p >= 'A') && (*p <= 'Z')) ||
520 ((*p >= 'a') && (*p <= 'z')) ||
|
521 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
522 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
523 mike 1.13 p++;
524
525 // The next character must be a space:
526
|
527 chuck 1.26 if (_isspace(*p))
|
528 mike 1.13 {
529 *p++ = '\0';
530 _skipWhitespace(p);
531 }
532
533 if (*p == '>')
534 {
535 *p++ = '\0';
536 return true;
537 }
538
539 return false;
540 }
541
542 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
543 {
544 openCloseElement = false;
545
|
546 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
547 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
548 (*p == '_')))
|
549 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
|
550 kumpf 1.24 p++;
|
551 mike 1.13
|
552 david 1.22 while ((*p) &&
553 (((*p >= 'A') && (*p <= 'Z')) ||
554 ((*p >= 'a') && (*p <= 'z')) ||
|
555 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
556 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
557 mike 1.13 p++;
558
559 // The next character must be a space:
560
|
561 chuck 1.26 if (_isspace(*p))
|
562 mike 1.13 {
563 *p++ = '\0';
564 _skipWhitespace(p);
565 }
566
567 if (*p == '>')
568 {
569 *p++ = '\0';
570 return true;
571 }
572
573 if (p[0] == '/' && p[1] == '>')
574 {
575 openCloseElement = true;
576 *p = '\0';
577 p += 2;
578 return true;
579 }
580
581 return false;
582 }
583 mike 1.13
584 void XmlParser::_getAttributeNameAndEqual(char*& p)
585 {
|
586 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
587 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
588 (*p == '_')))
|
589 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
590 kumpf 1.24 p++;
|
591 mike 1.13
|
592 david 1.22 while ((*p) &&
593 (((*p >= 'A') && (*p <= 'Z')) ||
594 ((*p >= 'a') && (*p <= 'z')) ||
|
595 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
596 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
597 mike 1.13 p++;
598
599 char* term = p;
600
601 _skipWhitespace(p);
602
603 if (*p != '=')
604 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
605
606 p++;
607
608 _skipWhitespace(p);
609
610 *term = '\0';
611 }
612
613 void XmlParser::_getAttributeValue(char*& p)
614 {
615 // ATTN-B: handle values contained in semiquotes:
616
617 if (*p != '"' && *p != '\'')
618 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
619
620 char startChar = *p++;
621
622 while (*p && *p != startChar)
623 p++;
624
625 if (*p != startChar)
626 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
627
628 *p++ = '\0';
629 }
630
631 void XmlParser::_getComment(char*& p)
632 {
633 // Now p points to first non-whitespace character beyond "<--" sequence:
634
635 for (; *p; p++)
636 {
637 if (p[0] == '-' && p[1] == '-')
638 {
639 mike 1.13 if (p[2] != '>')
640 {
641 throw XmlException(
642 XmlException::MINUS_MINUS_IN_COMMENT, _line);
643 }
644
645 // Find end of comment (excluding whitespace):
646
647 *p = '\0';
648 p += 3;
649 return;
650 }
651 }
652
653 // If it got this far, then the comment is unterminated:
654
655 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
656 }
657
658 void XmlParser::_getCData(char*& p)
659 {
660 mike 1.13 // At this point p points one past "<![CDATA[" sequence:
661
662 for (; *p; p++)
663 {
664 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
665 {
666 *p = '\0';
667 p += 3;
668 return;
669 }
670 else if (*p == '\n')
671 _line++;
672 }
673
674 // If it got this far, then the comment is unterminated:
675
676 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
677 }
678
679 void XmlParser::_getDocType(char*& p)
680 {
681 mike 1.13 // Just ignore the DOCTYPE command for now:
682
683 for (; *p && *p != '>'; p++)
684 {
685 if (*p == '\n')
686 _line++;
687 }
688
689 if (*p != '>')
690 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
691
692 p++;
693 }
694
695 void XmlParser::_getContent(char*& p)
696 {
697 while (*p && *p != '<')
698 {
699 if (*p == '\n')
700 _line++;
701
702 mike 1.13 p++;
703 }
704 }
705
706 void XmlParser::_substituteReferences(char* text)
707 {
708 Uint32 rem = strlen(text);
709
710 for (char* p = text; *p; p++, rem--)
711 {
712 if (*p == '&')
713 {
|
714 kumpf 1.18 // Process character or entity reference
|
715 mike 1.13
|
716 kumpf 1.18 Uint16 referenceChar = 0;
717 Uint32 referenceLength = 0;
718 XmlException::Code code = XmlException::MALFORMED_REFERENCE;
719
720 if (*(p+1) == '#')
721 {
722 // Found a character (numeric) reference
723 // Determine whether it is decimal or hex
724 if (*(p+2) == 'x')
725 {
726 // Decode a hexadecimal character reference
727 char* q = p+3;
728
729 // At most four digits are allowed, plus trailing ';'
730 Uint32 numDigits;
731 for (numDigits = 0; numDigits < 5; numDigits++, q++)
732 {
733 if (isdigit(*q))
734 {
735 referenceChar = (referenceChar << 4);
736 referenceChar += (*q - '0');
737 kumpf 1.18 }
738 else if ((*q >= 'A') && (*q <= 'F'))
739 {
740 referenceChar = (referenceChar << 4);
741 referenceChar += (*q - 'A' + 10);
742 }
743 else if ((*q >= 'a') && (*q <= 'f'))
744 {
745 referenceChar = (referenceChar << 4);
746 referenceChar += (*q - 'a' + 10);
747 }
748 else if (*q == ';')
749 {
750 break;
751 }
752 else
753 {
754 throw XmlException(code, _line);
755 }
756 }
757
758 kumpf 1.18 // Hex number must be 1 - 4 digits
759 if ((numDigits == 0) || (numDigits > 4))
760 {
761 throw XmlException(code, _line);
762 }
763
764 // ATTN: Currently do not support 16-bit characters
765 if (referenceChar > 0xff)
766 {
767 // ATTN: Is there a good way to say "unsupported"?
768 throw XmlException(code, _line);
769 }
770
771 referenceLength = numDigits + 4;
772 }
773 else
774 {
775 // Decode a decimal character reference
776 Uint32 newChar = 0;
777 char* q = p+2;
778
779 kumpf 1.18 // At most five digits are allowed, plus trailing ';'
780 Uint32 numDigits;
781 for (numDigits = 0; numDigits < 6; numDigits++, q++)
782 {
783 if (isdigit(*q))
784 {
785 newChar = (newChar * 10);
786 newChar += (*q - '0');
787 }
788 else if (*q == ';')
789 {
790 break;
791 }
792 else
793 {
794 throw XmlException(code, _line);
795 }
796 }
797
798 // Decimal number must be 1 - 5 digits and fit in 16 bits
799 if ((numDigits == 0) || (numDigits > 5) ||
800 kumpf 1.18 (newChar > 0xffff))
801 {
802 throw XmlException(code, _line);
803 }
804
805 // ATTN: Currently do not support 16-bit characters
806 if (newChar > 0xff)
807 {
808 // ATTN: Is there a good way to say "unsupported"?
809 throw XmlException(code, _line);
810 }
811
812 referenceChar = Uint16(newChar);
813 referenceLength = numDigits + 3;
814 }
815 }
816 else
817 {
818 // Check for entity reference
819 // ATTN: Inefficient if many entity references are supported
820 Uint32 i;
821 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++)
822 {
823 Uint32 length = _references[i].length;
824 const char* match = _references[i].match;
825
826 if (strncmp(p, _references[i].match, length) == 0)
827 {
828 referenceChar = _references[i].replacement;
829 referenceLength = length;
830 break;
831 }
832 }
833
834 if (i == _REFERENCES_SIZE)
835 {
836 // Didn't recognize the entity reference
837 // ATTN: Is there a good way to say "unsupported"?
838 throw XmlException(code, _line);
839 }
840 }
841
842 kumpf 1.18 // Replace the reference with the correct character
843 *p = (char)referenceChar;
844 char* q = p + referenceLength;
845 rem = rem - referenceLength + 1;
846 memmove(p + 1, q, rem);
|
847 mike 1.13 }
848 }
849 }
850
851 static const char _EMPTY_STRING[] = "";
852
853 void XmlParser::_getElement(char*& p, XmlEntry& entry)
854 {
855 entry.attributeCount = 0;
856
857 //--------------------------------------------------------------------------
858 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
859 //--------------------------------------------------------------------------
860
861 if (*p == '?')
862 {
863 entry.type = XmlEntry::XML_DECLARATION;
864 entry.text = ++p;
865
866 Boolean openCloseElement = false;
867
868 mike 1.13 if (_getElementName(p))
869 return;
870 }
871 else if (*p == '!')
872 {
873 p++;
874
875 // Expect a comment or CDATA:
876
877 if (p[0] == '-' && p[1] == '-')
878 {
879 p += 2;
880 entry.type = XmlEntry::COMMENT;
881 entry.text = p;
882 _getComment(p);
883 return;
884 }
885 else if (memcmp(p, "[CDATA[", 7) == 0)
886 {
887 p += 7;
888 entry.type = XmlEntry::CDATA;
889 mike 1.13 entry.text = p;
890 _getCData(p);
891 return;
892 }
893 else if (memcmp(p, "DOCTYPE", 7) == 0)
894 {
895 entry.type = XmlEntry::DOCTYPE;
896 entry.text = _EMPTY_STRING;
897 _getDocType(p);
898 return;
899 }
900 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
901 }
902 else if (*p == '/')
903 {
904 entry.type = XmlEntry::END_TAG;
905 entry.text = ++p;
906
907 if (!_getElementName(p))
908 throw(XmlException(XmlException::BAD_END_TAG, _line));
909
910 mike 1.13 return;
911 }
|
912 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
913 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
914 (*p == '_')))
|
915 mike 1.13 {
916 entry.type = XmlEntry::START_TAG;
917 entry.text = p;
918
919 Boolean openCloseElement = false;
920
921 if (_getOpenElementName(p, openCloseElement))
922 {
923 if (openCloseElement)
924 entry.type = XmlEntry::EMPTY_TAG;
925 return;
926 }
927 }
928 else
929 throw XmlException(XmlException::BAD_START_TAG, _line);
930
931 //--------------------------------------------------------------------------
932 // Grab all the attributes:
933 //--------------------------------------------------------------------------
934
935 for (;;)
936 mike 1.13 {
937 if (entry.type == XmlEntry::XML_DECLARATION)
938 {
939 if (p[0] == '?' && p[1] == '>')
940 {
941 p += 2;
942 return;
943 }
944 }
945 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
946 {
947 entry.type = XmlEntry::EMPTY_TAG;
948 p += 2;
949 return;
950 }
951 else if (*p == '>')
952 {
953 p++;
954 return;
955 }
956
957 mike 1.13 XmlAttribute attr;
958 attr.name = p;
959 _getAttributeNameAndEqual(p);
960
961 if (*p != '"' && *p != '\'')
962 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
963
964 attr.value = p + 1;
965 _getAttributeValue(p);
966
967 if (entry.type == XmlEntry::XML_DECLARATION)
968 {
969 // The next thing must a space or a "?>":
970
|
971 chuck 1.26 if (!(p[0] == '?' && p[1] == '>') && !_isspace(*p))
|
972 mike 1.13 {
973 throw XmlException(
974 XmlException::BAD_ATTRIBUTE_VALUE, _line);
975 }
976 }
|
977 chuck 1.26 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || _isspace(*p)))
|
978 mike 1.13 {
979 // The next thing must be a space or a '>':
980
981 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
982 }
983
984 _skipWhitespace(p);
985
986 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
987 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
988
989 _substituteReferences((char*)attr.value);
990 entry.attributes[entry.attributeCount++] = attr;
991 }
992 }
993
994 static const char* _typeStrings[] =
995 {
996 "XML_DECLARATION",
997 "START_TAG",
998 "EMPTY_TAG",
999 mike 1.13 "END_TAG",
1000 "COMMENT",
1001 "CDATA",
1002 "DOCTYPE",
1003 "CONTENT"
1004 };
1005
1006 void XmlEntry::print() const
1007 {
1008 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
1009
1010 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
1011
1012 if (needQuotes)
1013 PEGASUS_STD(cout) << "\"";
1014
1015 _printValue(text);
1016
1017 if (needQuotes)
1018 PEGASUS_STD(cout) << "\"";
1019
1020 mike 1.13 PEGASUS_STD(cout) << '\n';
1021
1022 for (Uint32 i = 0; i < attributeCount; i++)
1023 {
1024 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1025 _printValue(attributes[i].value);
1026 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1027 }
1028 }
1029
1030 const XmlAttribute* XmlEntry::findAttribute(
1031 const char* name) const
1032 {
1033 for (Uint32 i = 0; i < attributeCount; i++)
1034 {
1035 if (strcmp(attributes[i].name, name) == 0)
1036 return &attributes[i];
1037 }
1038
1039 return 0;
1040 }
1041 mike 1.13
1042 // Find first non-whitespace character (set first) and last non-whitespace
1043 // character (set last one past this). For example, consider this string:
1044 //
1045 // " 87 "
1046 //
1047 // The first pointer would point to '8' and the last pointer woudl point one
1048 // beyond '7'.
1049
1050 static void _findEnds(
1051 const char* str,
1052 const char*& first,
1053 const char*& last)
1054 {
1055 first = str;
1056
|
1057 chuck 1.26 while (_isspace(*first))
|
1058 mike 1.13 first++;
1059
1060 if (!*first)
1061 {
1062 last = first;
1063 return;
1064 }
1065
1066 last = first + strlen(first);
1067
|
1068 chuck 1.26 while (last != first && _isspace(last[-1]))
|
1069 mike 1.13 last--;
1070 }
1071
1072 Boolean XmlEntry::getAttributeValue(
1073 const char* name,
1074 Uint32& value) const
1075 {
1076 const XmlAttribute* attr = findAttribute(name);
1077
1078 if (!attr)
1079 return false;
1080
1081 const char* first;
1082 const char* last;
1083 _findEnds(attr->value, first, last);
1084
1085 char* end = 0;
1086 long tmp = strtol(first, &end, 10);
1087
1088 if (!end || end != last)
1089 return false;
1090 mike 1.13
1091 value = Uint32(tmp);
1092 return true;
1093 }
1094
1095 Boolean XmlEntry::getAttributeValue(
1096 const char* name,
1097 Real32& value) const
1098 {
1099 const XmlAttribute* attr = findAttribute(name);
1100
1101 if (!attr)
1102 return false;
1103
1104 const char* first;
1105 const char* last;
1106 _findEnds(attr->value, first, last);
1107
1108 char* end = 0;
1109 double tmp = strtod(first, &end);
1110
1111 mike 1.13 if (!end || end != last)
1112 return false;
1113
1114 value = Uint32(tmp);
1115 return true;
1116 }
1117
1118 Boolean XmlEntry::getAttributeValue(
1119 const char* name,
1120 const char*& value) const
1121 {
1122 const XmlAttribute* attr = findAttribute(name);
1123
1124 if (!attr)
1125 return false;
1126
1127 value = attr->value;
1128 return true;
1129 }
1130
1131 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1132 mike 1.13 {
1133 const char* tmp;
1134
1135 if (!getAttributeValue(name, tmp))
1136 return false;
1137
|
1138 chuck 1.28 value = String(tmp);
|
1139 mike 1.13 return true;
1140 }
1141
1142 void XmlAppendCString(Array<Sint8>& out, const char* str)
1143 {
1144 out.append(str, strlen(str));
1145 }
1146
1147 PEGASUS_NAMESPACE_END
|