1 mike 1.13 //%/////////////////////////////////////////////////////////////////////////////
2 //
|
3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
4 // The Open Group, Tivoli Systems
|
5 mike 1.13 //
6 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
8 // deal in the Software without restriction, including without limitation the
9 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
10 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
11 // furnished to do so, subject to the following conditions:
12 //
|
13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
14 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
15 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
17 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 //
22 //==============================================================================
23 //
24 // Author: Mike Brasher (mbrasher@bmc.com)
25 //
26 // Modified By:
27 //
28 //%/////////////////////////////////////////////////////////////////////////////
29
30 ////////////////////////////////////////////////////////////////////////////////
31 //
32 // XmlParser
33 //
34 // This file contains a simple non-validating XML parser. Here are
35 // serveral rules for well-formed XML:
36 //
37 // 1. Documents must begin with an XML declaration:
38 //
39 // <?xml version="1.0" standalone="yes"?>
40 mike 1.13 //
41 // 2. Comments have the form:
42 //
43 // <!-- blah blah blah -->
44 //
45 // 3. The following entity references are supported:
46 //
47 // & - ampersand
48 // < - less-than
49 // > - greater-than
50 // " - full quote
51 // &apos - apostrophe
52 //
|
53 kumpf 1.18 // as well as character (numeric) references:
54
55 // 1 - decimal reference for character '1'
56 // 1 - hexadecimal reference for character '1'
57 //
|
58 mike 1.13 // 4. Element names and attribute names take the following form:
59 //
60 // [A-Za-z_][A-Za-z_0-9-.:]
61 //
62 // 5. Arbitrary data (CDATA) can be enclosed like this:
63 //
64 // <![CDATA[
65 // ...
66 // ]]>
67 //
68 // 6. Element names and attributes names are case-sensitive.
69 //
70 // 7. XmlAttribute values must be delimited by full or half quotes.
71 // XmlAttribute values must be delimited.
72 //
73 // 8. <!DOCTYPE...>
74 //
75 // TODO:
76 //
|
77 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
|
78 mike 1.13 // Handle <!DOCTYPE...> sections which are complicated (containing
79 // rules rather than references to files).
80 //
81 // Remove newlines from string literals:
82 //
83 // Example: <xyz x="hello
84 // world">
85 //
86 ////////////////////////////////////////////////////////////////////////////////
87
|
88 sage 1.14 #include <Pegasus/Common/Config.h>
|
89 mike 1.13 #include <cctype>
90 #include <cstdio>
91 #include <cstdlib>
92 #include <cstring>
93 #include "XmlParser.h"
94 #include "Logger.h"
|
95 chuck 1.19 #include "ExceptionRep.h"
|
96 mike 1.13
97 PEGASUS_NAMESPACE_BEGIN
98
99 #define PEGASUS_ARRAY_T XmlEntry
100 # include "ArrayImpl.h"
101 #undef PEGASUS_ARRAY_T
102
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
114 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 mike 1.13 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
122 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
142 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
143
144 // Remove all redundant spaces from the given string:
145
146 static void _normalize(char* text)
147 {
148 Uint32 length = strlen(text);
149 char* p = text;
150 char* end = p + length;
151
152 // Remove leading spaces:
153
154 mike 1.13 while (isspace(*p))
155 p++;
156
157 if (p != text)
158 memmove(text, p, end - p + 1);
159
160 p = text;
161
162 // Look for sequences of more than one space and remove all but one.
163
164 for (;;)
165 {
166 // Advance to the next space:
167
168 while (*p && !isspace(*p))
169 p++;
170
171 if (!*p)
172 break;
173
174 // Advance to the next non-space:
175 mike 1.13
176 char* q = p++;
177
178 while (isspace(*p))
179 p++;
180
181 // Discard trailing spaces (if we are at the end):
182
183 if (!*p)
184 {
185 *q = '\0';
186 break;
187 }
188
189 // Remove the redundant spaces:
190
191 Uint32 n = p - q;
192
193 if (n > 1)
194 {
195 *q++ = ' ';
196 mike 1.13 memmove(q, p, end - p + 1);
197 p = q;
198 }
199 }
200 }
201
202 ////////////////////////////////////////////////////////////////////////////////
203 //
204 // XmlException
205 //
206 ////////////////////////////////////////////////////////////////////////////////
207
208 static const char* _xmlMessages[] =
209 {
210 "Bad opening element",
211 "Bad closing element",
212 "Bad attribute name",
213 "Exepected equal sign",
214 "Bad attribute value",
215 "A \"--\" sequence found within comment",
216 "Unterminated comment",
217 mike 1.13 "Unterminated CDATA block",
218 "Unterminated DOCTYPE",
219 "Too many attributes: parser only handles 10",
220 "Malformed reference",
221 "Expected a comment or CDATA following \"<!\" sequence",
222 "Closing element does not match opening element",
223 "One or more tags are still open",
224 "More than one root element was encountered",
225 "Validation error",
226 "Semantic error"
227 };
228
|
229 chuck 1.19 static const char* _xmlKeys[] =
230 {
|
231 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
232 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
233 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
234 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
235 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
236 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
237 "Common.XmlParser.UNTERMINATED_COMMENT",
238 "Common.XmlParser.UNTERMINATED_CDATA",
239 "Common.XmlParser.UNTERMINATED_DOCTYPE",
240 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
241 "Common.XmlParser.MALFORMED_REFERENCE",
242 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
243 "Common.XmlParser.START_END_MISMATCH",
244 "Common.XmlParser.UNCLOSED_TAGS",
245 "Common.XmlParser.MULTIPLE_ROOTS",
246 "Common.XmlParser.VALIDATION_ERROR",
247 "Common.XmlParser.SEMANTIC_ERROR"
248 };
249
|
250 chuck 1.23 // l10n replace _formMessage (comment out the old one)
|
251 chuck 1.19 /*
|
252 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
253 {
254 String result = _xmlMessages[Uint32(code) - 1];
255
256 char buffer[32];
257 sprintf(buffer, "%d", line);
258 result.append(": on line ");
259 result.append(buffer);
260
261 if (message.size())
262 {
263 result.append(": ");
264 result.append(message);
265 }
266
267 return result;
268 }
|
269 chuck 1.19 */
270
271 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
272 {
273 String dftMsg = _xmlMessages[Uint32(code) - 1];
274 String key = _xmlKeys[Uint32(code) - 1];
275 String msg = message;
276
277 dftMsg.append(": on line $0");
278 if (message.size())
279 {
|
280 humberto 1.20 msg = ": " + msg;
|
281 chuck 1.19 dftMsg.append("$1");
282 }
283
284 return MessageLoaderParms(key, dftMsg, line ,msg);
285 }
286
287 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
288 {
289 String dftMsg = _xmlMessages[Uint32(code) - 1];
290 String key = _xmlKeys[Uint32(code) - 1];
291
292 dftMsg.append(": on line $0");
293
294 return MessageLoaderParms(key, dftMsg, line);
295 }
296
|
297 mike 1.13
298 XmlException::XmlException(
299 XmlException::Code code,
300 Uint32 lineNumber,
301 const String& message)
302 : Exception(_formMessage(code, lineNumber, message))
303 {
304
305 }
306
|
307 chuck 1.19
308 XmlException::XmlException(
309 XmlException::Code code,
310 Uint32 lineNumber,
311 MessageLoaderParms& msgParms)
312 : Exception(_formPartialMessage(code, lineNumber))
313 {
|
314 humberto 1.21 if (msgParms.default_msg.size())
315 {
316 msgParms.default_msg = ": " + msgParms.default_msg;
317 }
|
318 chuck 1.19 _rep->message.append(MessageLoader::getMessage(msgParms));
319 }
320
321
|
322 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
323 //
324 // XmlValidationError
325 //
326 ////////////////////////////////////////////////////////////////////////////////
327
328 XmlValidationError::XmlValidationError(
329 Uint32 lineNumber,
330 const String& message)
331 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
332 {
333
334 }
335
|
336 chuck 1.19
337 XmlValidationError::XmlValidationError(
338 Uint32 lineNumber,
339 MessageLoaderParms& msgParms)
340 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
341 {
342
343 }
344
345
|
346 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
347 //
348 // XmlSemanticError
349 //
350 ////////////////////////////////////////////////////////////////////////////////
351
352 XmlSemanticError::XmlSemanticError(
353 Uint32 lineNumber,
354 const String& message)
355 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
356 {
357
358 }
|
359 chuck 1.19
360
361 XmlSemanticError::XmlSemanticError(
362 Uint32 lineNumber,
363 MessageLoaderParms& msgParms)
364 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
365 {
366
367 }
368
|
369 mike 1.13
370 ////////////////////////////////////////////////////////////////////////////////
371 //
372 // XmlParser
373 //
374 ////////////////////////////////////////////////////////////////////////////////
375
376 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
377 _restoreChar('\0'), _foundRoot(false)
378 {
379
380 }
381
382 Boolean XmlParser::next(XmlEntry& entry)
383 {
384 if (!_putBackStack.isEmpty())
385 {
386 entry = _putBackStack.top();
387 _putBackStack.pop();
388 return true;
389 }
390 mike 1.13
391 // If a character was overwritten with a null-terminator the last
392 // time this routine was called, then put back that character. Before
393 // exiting of course, restore the null-terminator.
394
395 char* nullTerminator = 0;
396
397 if (_restoreChar && !*_current)
398 {
399 nullTerminator = _current;
400 *_current = _restoreChar;
401 _restoreChar = '\0';
402 }
403
404 // Skip over any whitespace:
405
406 _skipWhitespace(_current);
407
408 if (!*_current)
409 {
410 if (nullTerminator)
411 mike 1.13 *nullTerminator = '\0';
412
413 if (!_stack.isEmpty())
414 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
415
416 return false;
417 }
418
419 // Either a "<...>" or content begins next:
420
421 if (*_current == '<')
422 {
423 _current++;
424 _getElement(_current, entry);
425
426 if (nullTerminator)
427 *nullTerminator = '\0';
428
429 if (entry.type == XmlEntry::START_TAG)
430 {
431 if (_stack.isEmpty() && _foundRoot)
432 mike 1.13 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
433
434 _foundRoot = true;
435 _stack.push((char*)entry.text);
436 }
437 else if (entry.type == XmlEntry::END_TAG)
438 {
439 if (_stack.isEmpty())
440 throw XmlException(XmlException::START_END_MISMATCH, _line);
441
442 if (strcmp(_stack.top(), entry.text) != 0)
443 throw XmlException(XmlException::START_END_MISMATCH, _line);
444
445 _stack.pop();
446 }
447
448 return true;
449 }
450 else
451 {
452 entry.type = XmlEntry::CONTENT;
453 mike 1.13 entry.text = _current;
454 _getContent(_current);
455 _restoreChar = *_current;
456 *_current = '\0';
457
458 if (nullTerminator)
459 *nullTerminator = '\0';
460
461 _substituteReferences((char*)entry.text);
462 _normalize((char*)entry.text);
463
464 return true;
465 }
466 }
467
468 void XmlParser::putBack(XmlEntry& entry)
469 {
470 _putBackStack.push(entry);
471 }
472
473 XmlParser::~XmlParser()
474 mike 1.13 {
475 // Nothing to do!
476 }
477
478 void XmlParser::_skipWhitespace(char*& p)
479 {
480 while (*p && isspace(*p))
481 {
482 if (*p == '\n')
483 _line++;
484
485 p++;
486 }
487 }
488
489 Boolean XmlParser::_getElementName(char*& p)
490 {
|
491 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
492 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
493 (*p == '_')))
|
494 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
|
495 kumpf 1.24 p++;
|
496 mike 1.13
|
497 david 1.22 while ((*p) &&
498 (((*p >= 'A') && (*p <= 'Z')) ||
499 ((*p >= 'a') && (*p <= 'z')) ||
|
500 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
501 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
502 mike 1.13 p++;
503
504 // The next character must be a space:
505
506 if (isspace(*p))
507 {
508 *p++ = '\0';
509 _skipWhitespace(p);
510 }
511
512 if (*p == '>')
513 {
514 *p++ = '\0';
515 return true;
516 }
517
518 return false;
519 }
520
521 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
522 {
523 mike 1.13 openCloseElement = false;
524
|
525 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
526 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
527 (*p == '_')))
|
528 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
|
529 kumpf 1.24 p++;
|
530 mike 1.13
|
531 david 1.22 while ((*p) &&
532 (((*p >= 'A') && (*p <= 'Z')) ||
533 ((*p >= 'a') && (*p <= 'z')) ||
|
534 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
535 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
536 mike 1.13 p++;
537
538 // The next character must be a space:
539
540 if (isspace(*p))
541 {
542 *p++ = '\0';
543 _skipWhitespace(p);
544 }
545
546 if (*p == '>')
547 {
548 *p++ = '\0';
549 return true;
550 }
551
552 if (p[0] == '/' && p[1] == '>')
553 {
554 openCloseElement = true;
555 *p = '\0';
556 p += 2;
557 mike 1.13 return true;
558 }
559
560 return false;
561 }
562
563 void XmlParser::_getAttributeNameAndEqual(char*& p)
564 {
|
565 david 1.25 if (!(((*p >= 'A') && (*p <= 'Z')) ||
|
566 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
567 (*p == '_')))
|
568 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
569 kumpf 1.24 p++;
|
570 mike 1.13
|
571 david 1.22 while ((*p) &&
572 (((*p >= 'A') && (*p <= 'Z')) ||
573 ((*p >= 'a') && (*p <= 'z')) ||
|
574 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
575 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
576 mike 1.13 p++;
577
578 char* term = p;
579
580 _skipWhitespace(p);
581
582 if (*p != '=')
583 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
584
585 p++;
586
587 _skipWhitespace(p);
588
589 *term = '\0';
590 }
591
592 void XmlParser::_getAttributeValue(char*& p)
593 {
594 // ATTN-B: handle values contained in semiquotes:
595
596 if (*p != '"' && *p != '\'')
597 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
598
599 char startChar = *p++;
600
601 while (*p && *p != startChar)
602 p++;
603
604 if (*p != startChar)
605 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
606
607 *p++ = '\0';
608 }
609
610 void XmlParser::_getComment(char*& p)
611 {
612 // Now p points to first non-whitespace character beyond "<--" sequence:
613
614 for (; *p; p++)
615 {
616 if (p[0] == '-' && p[1] == '-')
617 {
618 mike 1.13 if (p[2] != '>')
619 {
620 throw XmlException(
621 XmlException::MINUS_MINUS_IN_COMMENT, _line);
622 }
623
624 // Find end of comment (excluding whitespace):
625
626 *p = '\0';
627 p += 3;
628 return;
629 }
630 }
631
632 // If it got this far, then the comment is unterminated:
633
634 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
635 }
636
637 void XmlParser::_getCData(char*& p)
638 {
639 mike 1.13 // At this point p points one past "<![CDATA[" sequence:
640
641 for (; *p; p++)
642 {
643 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
644 {
645 *p = '\0';
646 p += 3;
647 return;
648 }
649 else if (*p == '\n')
650 _line++;
651 }
652
653 // If it got this far, then the comment is unterminated:
654
655 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
656 }
657
658 void XmlParser::_getDocType(char*& p)
659 {
660 mike 1.13 // Just ignore the DOCTYPE command for now:
661
662 for (; *p && *p != '>'; p++)
663 {
664 if (*p == '\n')
665 _line++;
666 }
667
668 if (*p != '>')
669 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
670
671 p++;
672 }
673
674 void XmlParser::_getContent(char*& p)
675 {
676 while (*p && *p != '<')
677 {
678 if (*p == '\n')
679 _line++;
680
681 mike 1.13 p++;
682 }
683 }
684
685 void XmlParser::_substituteReferences(char* text)
686 {
687 Uint32 rem = strlen(text);
688
689 for (char* p = text; *p; p++, rem--)
690 {
691 if (*p == '&')
692 {
|
693 kumpf 1.18 // Process character or entity reference
|
694 mike 1.13
|
695 kumpf 1.18 Uint16 referenceChar = 0;
696 Uint32 referenceLength = 0;
697 XmlException::Code code = XmlException::MALFORMED_REFERENCE;
698
699 if (*(p+1) == '#')
700 {
701 // Found a character (numeric) reference
702 // Determine whether it is decimal or hex
703 if (*(p+2) == 'x')
704 {
705 // Decode a hexadecimal character reference
706 char* q = p+3;
707
708 // At most four digits are allowed, plus trailing ';'
709 Uint32 numDigits;
710 for (numDigits = 0; numDigits < 5; numDigits++, q++)
711 {
712 if (isdigit(*q))
713 {
714 referenceChar = (referenceChar << 4);
715 referenceChar += (*q - '0');
716 kumpf 1.18 }
717 else if ((*q >= 'A') && (*q <= 'F'))
718 {
719 referenceChar = (referenceChar << 4);
720 referenceChar += (*q - 'A' + 10);
721 }
722 else if ((*q >= 'a') && (*q <= 'f'))
723 {
724 referenceChar = (referenceChar << 4);
725 referenceChar += (*q - 'a' + 10);
726 }
727 else if (*q == ';')
728 {
729 break;
730 }
731 else
732 {
733 throw XmlException(code, _line);
734 }
735 }
736
737 kumpf 1.18 // Hex number must be 1 - 4 digits
738 if ((numDigits == 0) || (numDigits > 4))
739 {
740 throw XmlException(code, _line);
741 }
742
743 // ATTN: Currently do not support 16-bit characters
744 if (referenceChar > 0xff)
745 {
746 // ATTN: Is there a good way to say "unsupported"?
747 throw XmlException(code, _line);
748 }
749
750 referenceLength = numDigits + 4;
751 }
752 else
753 {
754 // Decode a decimal character reference
755 Uint32 newChar = 0;
756 char* q = p+2;
757
758 kumpf 1.18 // At most five digits are allowed, plus trailing ';'
759 Uint32 numDigits;
760 for (numDigits = 0; numDigits < 6; numDigits++, q++)
761 {
762 if (isdigit(*q))
763 {
764 newChar = (newChar * 10);
765 newChar += (*q - '0');
766 }
767 else if (*q == ';')
768 {
769 break;
770 }
771 else
772 {
773 throw XmlException(code, _line);
774 }
775 }
776
777 // Decimal number must be 1 - 5 digits and fit in 16 bits
778 if ((numDigits == 0) || (numDigits > 5) ||
779 kumpf 1.18 (newChar > 0xffff))
780 {
781 throw XmlException(code, _line);
782 }
783
784 // ATTN: Currently do not support 16-bit characters
785 if (newChar > 0xff)
786 {
787 // ATTN: Is there a good way to say "unsupported"?
788 throw XmlException(code, _line);
789 }
790
791 referenceChar = Uint16(newChar);
792 referenceLength = numDigits + 3;
793 }
794 }
795 else
796 {
797 // Check for entity reference
798 // ATTN: Inefficient if many entity references are supported
799 Uint32 i;
800 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++)
801 {
802 Uint32 length = _references[i].length;
803 const char* match = _references[i].match;
804
805 if (strncmp(p, _references[i].match, length) == 0)
806 {
807 referenceChar = _references[i].replacement;
808 referenceLength = length;
809 break;
810 }
811 }
812
813 if (i == _REFERENCES_SIZE)
814 {
815 // Didn't recognize the entity reference
816 // ATTN: Is there a good way to say "unsupported"?
817 throw XmlException(code, _line);
818 }
819 }
820
821 kumpf 1.18 // Replace the reference with the correct character
822 *p = (char)referenceChar;
823 char* q = p + referenceLength;
824 rem = rem - referenceLength + 1;
825 memmove(p + 1, q, rem);
|
826 mike 1.13 }
827 }
828 }
829
830 static const char _EMPTY_STRING[] = "";
831
832 void XmlParser::_getElement(char*& p, XmlEntry& entry)
833 {
834 entry.attributeCount = 0;
835
836 //--------------------------------------------------------------------------
837 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
838 //--------------------------------------------------------------------------
839
840 if (*p == '?')
841 {
842 entry.type = XmlEntry::XML_DECLARATION;
843 entry.text = ++p;
844
845 Boolean openCloseElement = false;
846
847 mike 1.13 if (_getElementName(p))
848 return;
849 }
850 else if (*p == '!')
851 {
852 p++;
853
854 // Expect a comment or CDATA:
855
856 if (p[0] == '-' && p[1] == '-')
857 {
858 p += 2;
859 entry.type = XmlEntry::COMMENT;
860 entry.text = p;
861 _getComment(p);
862 return;
863 }
864 else if (memcmp(p, "[CDATA[", 7) == 0)
865 {
866 p += 7;
867 entry.type = XmlEntry::CDATA;
868 mike 1.13 entry.text = p;
869 _getCData(p);
870 return;
871 }
872 else if (memcmp(p, "DOCTYPE", 7) == 0)
873 {
874 entry.type = XmlEntry::DOCTYPE;
875 entry.text = _EMPTY_STRING;
876 _getDocType(p);
877 return;
878 }
879 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
880 }
881 else if (*p == '/')
882 {
883 entry.type = XmlEntry::END_TAG;
884 entry.text = ++p;
885
886 if (!_getElementName(p))
887 throw(XmlException(XmlException::BAD_END_TAG, _line));
888
889 mike 1.13 return;
890 }
|
891 david 1.25 else if ((((*p >= 'A') && (*p <= 'Z')) ||
|
892 kumpf 1.24 ((*p >= 'a') && (*p <= 'z')) ||
893 (*p == '_')))
|
894 mike 1.13 {
895 entry.type = XmlEntry::START_TAG;
896 entry.text = p;
897
898 Boolean openCloseElement = false;
899
900 if (_getOpenElementName(p, openCloseElement))
901 {
902 if (openCloseElement)
903 entry.type = XmlEntry::EMPTY_TAG;
904 return;
905 }
906 }
907 else
908 throw XmlException(XmlException::BAD_START_TAG, _line);
909
910 //--------------------------------------------------------------------------
911 // Grab all the attributes:
912 //--------------------------------------------------------------------------
913
914 for (;;)
915 mike 1.13 {
916 if (entry.type == XmlEntry::XML_DECLARATION)
917 {
918 if (p[0] == '?' && p[1] == '>')
919 {
920 p += 2;
921 return;
922 }
923 }
924 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
925 {
926 entry.type = XmlEntry::EMPTY_TAG;
927 p += 2;
928 return;
929 }
930 else if (*p == '>')
931 {
932 p++;
933 return;
934 }
935
936 mike 1.13 XmlAttribute attr;
937 attr.name = p;
938 _getAttributeNameAndEqual(p);
939
940 if (*p != '"' && *p != '\'')
941 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
942
943 attr.value = p + 1;
944 _getAttributeValue(p);
945
946 if (entry.type == XmlEntry::XML_DECLARATION)
947 {
948 // The next thing must a space or a "?>":
949
950 if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))
951 {
952 throw XmlException(
953 XmlException::BAD_ATTRIBUTE_VALUE, _line);
954 }
955 }
956 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))
957 mike 1.13 {
958 // The next thing must be a space or a '>':
959
960 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
961 }
962
963 _skipWhitespace(p);
964
965 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
966 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
967
968 _substituteReferences((char*)attr.value);
969 entry.attributes[entry.attributeCount++] = attr;
970 }
971 }
972
973 static const char* _typeStrings[] =
974 {
975 "XML_DECLARATION",
976 "START_TAG",
977 "EMPTY_TAG",
978 mike 1.13 "END_TAG",
979 "COMMENT",
980 "CDATA",
981 "DOCTYPE",
982 "CONTENT"
983 };
984
985 void XmlEntry::print() const
986 {
987 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
988
989 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
990
991 if (needQuotes)
992 PEGASUS_STD(cout) << "\"";
993
994 _printValue(text);
995
996 if (needQuotes)
997 PEGASUS_STD(cout) << "\"";
998
999 mike 1.13 PEGASUS_STD(cout) << '\n';
1000
1001 for (Uint32 i = 0; i < attributeCount; i++)
1002 {
1003 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1004 _printValue(attributes[i].value);
1005 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1006 }
1007 }
1008
1009 const XmlAttribute* XmlEntry::findAttribute(
1010 const char* name) const
1011 {
1012 for (Uint32 i = 0; i < attributeCount; i++)
1013 {
1014 if (strcmp(attributes[i].name, name) == 0)
1015 return &attributes[i];
1016 }
1017
1018 return 0;
1019 }
1020 mike 1.13
1021 // Find first non-whitespace character (set first) and last non-whitespace
1022 // character (set last one past this). For example, consider this string:
1023 //
1024 // " 87 "
1025 //
1026 // The first pointer would point to '8' and the last pointer woudl point one
1027 // beyond '7'.
1028
1029 static void _findEnds(
1030 const char* str,
1031 const char*& first,
1032 const char*& last)
1033 {
1034 first = str;
1035
1036 while (isspace(*first))
1037 first++;
1038
1039 if (!*first)
1040 {
1041 mike 1.13 last = first;
1042 return;
1043 }
1044
1045 last = first + strlen(first);
1046
1047 while (last != first && isspace(last[-1]))
1048 last--;
1049 }
1050
1051 Boolean XmlEntry::getAttributeValue(
1052 const char* name,
1053 Uint32& value) const
1054 {
1055 const XmlAttribute* attr = findAttribute(name);
1056
1057 if (!attr)
1058 return false;
1059
1060 const char* first;
1061 const char* last;
1062 mike 1.13 _findEnds(attr->value, first, last);
1063
1064 char* end = 0;
1065 long tmp = strtol(first, &end, 10);
1066
1067 if (!end || end != last)
1068 return false;
1069
1070 value = Uint32(tmp);
1071 return true;
1072 }
1073
1074 Boolean XmlEntry::getAttributeValue(
1075 const char* name,
1076 Real32& value) const
1077 {
1078 const XmlAttribute* attr = findAttribute(name);
1079
1080 if (!attr)
1081 return false;
1082
1083 mike 1.13 const char* first;
1084 const char* last;
1085 _findEnds(attr->value, first, last);
1086
1087 char* end = 0;
1088 double tmp = strtod(first, &end);
1089
1090 if (!end || end != last)
1091 return false;
1092
1093 value = Uint32(tmp);
1094 return true;
1095 }
1096
1097 Boolean XmlEntry::getAttributeValue(
1098 const char* name,
1099 const char*& value) const
1100 {
1101 const XmlAttribute* attr = findAttribute(name);
1102
1103 if (!attr)
1104 mike 1.13 return false;
1105
1106 value = attr->value;
1107 return true;
1108 }
1109
1110 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1111 {
1112 const char* tmp;
1113
1114 if (!getAttributeValue(name, tmp))
1115 return false;
1116
|
1117 david 1.22 value = String(tmp,STRING_FLAG_UTF8);
|
1118 mike 1.13 return true;
1119 }
1120
1121 void XmlAppendCString(Array<Sint8>& out, const char* str)
1122 {
1123 out.append(str, strlen(str));
1124 }
1125
1126 PEGASUS_NAMESPACE_END
|