1 mike 1.13 //%/////////////////////////////////////////////////////////////////////////////
2 //
|
3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
4 // The Open Group, Tivoli Systems
|
5 mike 1.13 //
6 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
8 // deal in the Software without restriction, including without limitation the
9 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
10 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
11 // furnished to do so, subject to the following conditions:
12 //
|
13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
14 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
15 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
17 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 //
22 //==============================================================================
23 //
24 // Author: Mike Brasher (mbrasher@bmc.com)
25 //
26 // Modified By:
27 //
28 //%/////////////////////////////////////////////////////////////////////////////
29
30 ////////////////////////////////////////////////////////////////////////////////
31 //
32 // XmlParser
33 //
34 // This file contains a simple non-validating XML parser. Here are
35 // serveral rules for well-formed XML:
36 //
37 // 1. Documents must begin with an XML declaration:
38 //
39 // <?xml version="1.0" standalone="yes"?>
40 mike 1.13 //
41 // 2. Comments have the form:
42 //
43 // <!-- blah blah blah -->
44 //
45 // 3. The following entity references are supported:
46 //
47 // & - ampersand
48 // < - less-than
49 // > - greater-than
50 // " - full quote
51 // &apos - apostrophe
52 //
|
53 kumpf 1.18 // as well as character (numeric) references:
54
55 // 1 - decimal reference for character '1'
56 // 1 - hexadecimal reference for character '1'
57 //
|
58 mike 1.13 // 4. Element names and attribute names take the following form:
59 //
60 // [A-Za-z_][A-Za-z_0-9-.:]
61 //
62 // 5. Arbitrary data (CDATA) can be enclosed like this:
63 //
64 // <![CDATA[
65 // ...
66 // ]]>
67 //
68 // 6. Element names and attributes names are case-sensitive.
69 //
70 // 7. XmlAttribute values must be delimited by full or half quotes.
71 // XmlAttribute values must be delimited.
72 //
73 // 8. <!DOCTYPE...>
74 //
75 // TODO:
76 //
|
77 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
|
78 mike 1.13 // Handle <!DOCTYPE...> sections which are complicated (containing
79 // rules rather than references to files).
80 //
81 // Remove newlines from string literals:
82 //
83 // Example: <xyz x="hello
84 // world">
85 //
86 ////////////////////////////////////////////////////////////////////////////////
87
|
88 sage 1.14 #include <Pegasus/Common/Config.h>
|
89 mike 1.13 #include <cctype>
90 #include <cstdio>
91 #include <cstdlib>
92 #include <cstring>
93 #include "XmlParser.h"
94 #include "Logger.h"
|
95 chuck 1.19 #include "ExceptionRep.h"
|
96 mike 1.13
97 PEGASUS_NAMESPACE_BEGIN
98
99 #define PEGASUS_ARRAY_T XmlEntry
100 # include "ArrayImpl.h"
101 #undef PEGASUS_ARRAY_T
102
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
114 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 mike 1.13 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
122 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
142 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
143
144 // Remove all redundant spaces from the given string:
145
146 static void _normalize(char* text)
147 {
148 Uint32 length = strlen(text);
149 char* p = text;
150 char* end = p + length;
151
152 // Remove leading spaces:
153
154 mike 1.13 while (isspace(*p))
155 p++;
156
157 if (p != text)
158 memmove(text, p, end - p + 1);
159
160 p = text;
161
162 // Look for sequences of more than one space and remove all but one.
163
164 for (;;)
165 {
166 // Advance to the next space:
167
168 while (*p && !isspace(*p))
169 p++;
170
171 if (!*p)
172 break;
173
174 // Advance to the next non-space:
175 mike 1.13
176 char* q = p++;
177
178 while (isspace(*p))
179 p++;
180
181 // Discard trailing spaces (if we are at the end):
182
183 if (!*p)
184 {
185 *q = '\0';
186 break;
187 }
188
189 // Remove the redundant spaces:
190
191 Uint32 n = p - q;
192
193 if (n > 1)
194 {
195 *q++ = ' ';
196 mike 1.13 memmove(q, p, end - p + 1);
197 p = q;
198 }
199 }
200 }
201
202 ////////////////////////////////////////////////////////////////////////////////
203 //
204 // XmlException
205 //
206 ////////////////////////////////////////////////////////////////////////////////
207
208 static const char* _xmlMessages[] =
209 {
210 "Bad opening element",
211 "Bad closing element",
212 "Bad attribute name",
213 "Exepected equal sign",
214 "Bad attribute value",
215 "A \"--\" sequence found within comment",
216 "Unterminated comment",
217 mike 1.13 "Unterminated CDATA block",
218 "Unterminated DOCTYPE",
219 "Too many attributes: parser only handles 10",
220 "Malformed reference",
221 "Expected a comment or CDATA following \"<!\" sequence",
222 "Closing element does not match opening element",
223 "One or more tags are still open",
224 "More than one root element was encountered",
225 "Validation error",
226 "Semantic error"
227 };
228
|
229 chuck 1.19 static const char* _xmlKeys[] =
230 {
|
231 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
232 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
233 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
234 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
235 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
236 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
237 "Common.XmlParser.UNTERMINATED_COMMENT",
238 "Common.XmlParser.UNTERMINATED_CDATA",
239 "Common.XmlParser.UNTERMINATED_DOCTYPE",
240 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
241 "Common.XmlParser.MALFORMED_REFERENCE",
242 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
243 "Common.XmlParser.START_END_MISMATCH",
244 "Common.XmlParser.UNCLOSED_TAGS",
245 "Common.XmlParser.MULTIPLE_ROOTS",
246 "Common.XmlParser.VALIDATION_ERROR",
247 "Common.XmlParser.SEMANTIC_ERROR"
248 };
249
250 // l10n TODO replace _formMessage with the commented one and uncomment
251 // the new constructors
252 /*
|
253 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
254 {
255 String result = _xmlMessages[Uint32(code) - 1];
256
257 char buffer[32];
258 sprintf(buffer, "%d", line);
259 result.append(": on line ");
260 result.append(buffer);
261
262 if (message.size())
263 {
264 result.append(": ");
265 result.append(message);
266 }
267
268 return result;
269 }
|
270 chuck 1.19 */
271
272 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
273 {
274 String dftMsg = _xmlMessages[Uint32(code) - 1];
275 String key = _xmlKeys[Uint32(code) - 1];
276 String msg = message;
277
278 dftMsg.append(": on line $0");
279 if (message.size())
280 {
|
281 humberto 1.20 msg = ": " + msg;
|
282 chuck 1.19 dftMsg.append("$1");
283 }
284
285 return MessageLoaderParms(key, dftMsg, line ,msg);
286 }
287
288 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
289 {
290 String dftMsg = _xmlMessages[Uint32(code) - 1];
291 String key = _xmlKeys[Uint32(code) - 1];
292
293 dftMsg.append(": on line $0");
294
295 return MessageLoaderParms(key, dftMsg, line);
296 }
297
|
298 mike 1.13
299 XmlException::XmlException(
300 XmlException::Code code,
301 Uint32 lineNumber,
302 const String& message)
303 : Exception(_formMessage(code, lineNumber, message))
304 {
305
306 }
307
|
308 chuck 1.19
309 XmlException::XmlException(
310 XmlException::Code code,
311 Uint32 lineNumber,
312 MessageLoaderParms& msgParms)
313 : Exception(_formPartialMessage(code, lineNumber))
314 {
|
315 humberto 1.21 if (msgParms.default_msg.size())
316 {
317 msgParms.default_msg = ": " + msgParms.default_msg;
318 }
|
319 chuck 1.19 _rep->message.append(MessageLoader::getMessage(msgParms));
320 }
321
322
|
323 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
324 //
325 // XmlValidationError
326 //
327 ////////////////////////////////////////////////////////////////////////////////
328
329 XmlValidationError::XmlValidationError(
330 Uint32 lineNumber,
331 const String& message)
332 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
333 {
334
335 }
336
|
337 chuck 1.19
338 XmlValidationError::XmlValidationError(
339 Uint32 lineNumber,
340 MessageLoaderParms& msgParms)
341 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
342 {
343
344 }
345
346
|
347 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
348 //
349 // XmlSemanticError
350 //
351 ////////////////////////////////////////////////////////////////////////////////
352
353 XmlSemanticError::XmlSemanticError(
354 Uint32 lineNumber,
355 const String& message)
356 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
357 {
358
359 }
|
360 chuck 1.19
361
362 XmlSemanticError::XmlSemanticError(
363 Uint32 lineNumber,
364 MessageLoaderParms& msgParms)
365 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
366 {
367
368 }
369
|
370 mike 1.13
371 ////////////////////////////////////////////////////////////////////////////////
372 //
373 // XmlParser
374 //
375 ////////////////////////////////////////////////////////////////////////////////
376
377 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
378 _restoreChar('\0'), _foundRoot(false)
379 {
380
381 }
382
383 Boolean XmlParser::next(XmlEntry& entry)
384 {
385 if (!_putBackStack.isEmpty())
386 {
387 entry = _putBackStack.top();
388 _putBackStack.pop();
389 return true;
390 }
391 mike 1.13
392 // If a character was overwritten with a null-terminator the last
393 // time this routine was called, then put back that character. Before
394 // exiting of course, restore the null-terminator.
395
396 char* nullTerminator = 0;
397
398 if (_restoreChar && !*_current)
399 {
400 nullTerminator = _current;
401 *_current = _restoreChar;
402 _restoreChar = '\0';
403 }
404
405 // Skip over any whitespace:
406
407 _skipWhitespace(_current);
408
409 if (!*_current)
410 {
411 if (nullTerminator)
412 mike 1.13 *nullTerminator = '\0';
413
414 if (!_stack.isEmpty())
415 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
416
417 return false;
418 }
419
420 // Either a "<...>" or content begins next:
421
422 if (*_current == '<')
423 {
424 _current++;
425 _getElement(_current, entry);
426
427 if (nullTerminator)
428 *nullTerminator = '\0';
429
430 if (entry.type == XmlEntry::START_TAG)
431 {
432 if (_stack.isEmpty() && _foundRoot)
433 mike 1.13 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
434
435 _foundRoot = true;
436 _stack.push((char*)entry.text);
437 }
438 else if (entry.type == XmlEntry::END_TAG)
439 {
440 if (_stack.isEmpty())
441 throw XmlException(XmlException::START_END_MISMATCH, _line);
442
443 if (strcmp(_stack.top(), entry.text) != 0)
444 throw XmlException(XmlException::START_END_MISMATCH, _line);
445
446 _stack.pop();
447 }
448
449 return true;
450 }
451 else
452 {
453 entry.type = XmlEntry::CONTENT;
454 mike 1.13 entry.text = _current;
455 _getContent(_current);
456 _restoreChar = *_current;
457 *_current = '\0';
458
459 if (nullTerminator)
460 *nullTerminator = '\0';
461
462 _substituteReferences((char*)entry.text);
463 _normalize((char*)entry.text);
464
465 return true;
466 }
467 }
468
469 void XmlParser::putBack(XmlEntry& entry)
470 {
471 _putBackStack.push(entry);
472 }
473
474 XmlParser::~XmlParser()
475 mike 1.13 {
476 // Nothing to do!
477 }
478
479 void XmlParser::_skipWhitespace(char*& p)
480 {
481 while (*p && isspace(*p))
482 {
483 if (*p == '\n')
484 _line++;
485
486 p++;
487 }
488 }
489
490 Boolean XmlParser::_getElementName(char*& p)
491 {
492 if (!isalpha(*p) && *p != '_')
493 throw XmlException(XmlException::BAD_START_TAG, _line);
494
495 while (*p &&
496 mike 1.13 (isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))
497 p++;
498
499 // The next character must be a space:
500
501 if (isspace(*p))
502 {
503 *p++ = '\0';
504 _skipWhitespace(p);
505 }
506
507 if (*p == '>')
508 {
509 *p++ = '\0';
510 return true;
511 }
512
513 return false;
514 }
515
516 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
517 mike 1.13 {
518 openCloseElement = false;
519
520 if (!isalpha(*p) && *p != '_')
521 throw XmlException(XmlException::BAD_START_TAG, _line);
522
523 while (*p &&
524 (isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))
525 p++;
526
527 // The next character must be a space:
528
529 if (isspace(*p))
530 {
531 *p++ = '\0';
532 _skipWhitespace(p);
533 }
534
535 if (*p == '>')
536 {
537 *p++ = '\0';
538 mike 1.13 return true;
539 }
540
541 if (p[0] == '/' && p[1] == '>')
542 {
543 openCloseElement = true;
544 *p = '\0';
545 p += 2;
546 return true;
547 }
548
549 return false;
550 }
551
552 void XmlParser::_getAttributeNameAndEqual(char*& p)
553 {
554 if (!isalpha(*p) && *p != '_')
555 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
556
557 while (*p &&
558 (isalnum(*p) || *p == '_' || *p == '-' || *p == ':' || *p == '.'))
559 mike 1.13 p++;
560
561 char* term = p;
562
563 _skipWhitespace(p);
564
565 if (*p != '=')
566 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
567
568 p++;
569
570 _skipWhitespace(p);
571
572 *term = '\0';
573 }
574
575 void XmlParser::_getAttributeValue(char*& p)
576 {
577 // ATTN-B: handle values contained in semiquotes:
578
579 if (*p != '"' && *p != '\'')
580 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
581
582 char startChar = *p++;
583
584 while (*p && *p != startChar)
585 p++;
586
587 if (*p != startChar)
588 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
589
590 *p++ = '\0';
591 }
592
593 void XmlParser::_getComment(char*& p)
594 {
595 // Now p points to first non-whitespace character beyond "<--" sequence:
596
597 for (; *p; p++)
598 {
599 if (p[0] == '-' && p[1] == '-')
600 {
601 mike 1.13 if (p[2] != '>')
602 {
603 throw XmlException(
604 XmlException::MINUS_MINUS_IN_COMMENT, _line);
605 }
606
607 // Find end of comment (excluding whitespace):
608
609 *p = '\0';
610 p += 3;
611 return;
612 }
613 }
614
615 // If it got this far, then the comment is unterminated:
616
617 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
618 }
619
620 void XmlParser::_getCData(char*& p)
621 {
622 mike 1.13 // At this point p points one past "<![CDATA[" sequence:
623
624 for (; *p; p++)
625 {
626 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
627 {
628 *p = '\0';
629 p += 3;
630 return;
631 }
632 else if (*p == '\n')
633 _line++;
634 }
635
636 // If it got this far, then the comment is unterminated:
637
638 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
639 }
640
641 void XmlParser::_getDocType(char*& p)
642 {
643 mike 1.13 // Just ignore the DOCTYPE command for now:
644
645 for (; *p && *p != '>'; p++)
646 {
647 if (*p == '\n')
648 _line++;
649 }
650
651 if (*p != '>')
652 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
653
654 p++;
655 }
656
657 void XmlParser::_getContent(char*& p)
658 {
659 while (*p && *p != '<')
660 {
661 if (*p == '\n')
662 _line++;
663
664 mike 1.13 p++;
665 }
666 }
667
668 void XmlParser::_substituteReferences(char* text)
669 {
670 Uint32 rem = strlen(text);
671
672 for (char* p = text; *p; p++, rem--)
673 {
674 if (*p == '&')
675 {
|
676 kumpf 1.18 // Process character or entity reference
|
677 mike 1.13
|
678 kumpf 1.18 Uint16 referenceChar = 0;
679 Uint32 referenceLength = 0;
680 XmlException::Code code = XmlException::MALFORMED_REFERENCE;
681
682 if (*(p+1) == '#')
683 {
684 // Found a character (numeric) reference
685 // Determine whether it is decimal or hex
686 if (*(p+2) == 'x')
687 {
688 // Decode a hexadecimal character reference
689 char* q = p+3;
690
691 // At most four digits are allowed, plus trailing ';'
692 Uint32 numDigits;
693 for (numDigits = 0; numDigits < 5; numDigits++, q++)
694 {
695 if (isdigit(*q))
696 {
697 referenceChar = (referenceChar << 4);
698 referenceChar += (*q - '0');
699 kumpf 1.18 }
700 else if ((*q >= 'A') && (*q <= 'F'))
701 {
702 referenceChar = (referenceChar << 4);
703 referenceChar += (*q - 'A' + 10);
704 }
705 else if ((*q >= 'a') && (*q <= 'f'))
706 {
707 referenceChar = (referenceChar << 4);
708 referenceChar += (*q - 'a' + 10);
709 }
710 else if (*q == ';')
711 {
712 break;
713 }
714 else
715 {
716 throw XmlException(code, _line);
717 }
718 }
719
720 kumpf 1.18 // Hex number must be 1 - 4 digits
721 if ((numDigits == 0) || (numDigits > 4))
722 {
723 throw XmlException(code, _line);
724 }
725
726 // ATTN: Currently do not support 16-bit characters
727 if (referenceChar > 0xff)
728 {
729 // ATTN: Is there a good way to say "unsupported"?
730 throw XmlException(code, _line);
731 }
732
733 referenceLength = numDigits + 4;
734 }
735 else
736 {
737 // Decode a decimal character reference
738 Uint32 newChar = 0;
739 char* q = p+2;
740
741 kumpf 1.18 // At most five digits are allowed, plus trailing ';'
742 Uint32 numDigits;
743 for (numDigits = 0; numDigits < 6; numDigits++, q++)
744 {
745 if (isdigit(*q))
746 {
747 newChar = (newChar * 10);
748 newChar += (*q - '0');
749 }
750 else if (*q == ';')
751 {
752 break;
753 }
754 else
755 {
756 throw XmlException(code, _line);
757 }
758 }
759
760 // Decimal number must be 1 - 5 digits and fit in 16 bits
761 if ((numDigits == 0) || (numDigits > 5) ||
762 kumpf 1.18 (newChar > 0xffff))
763 {
764 throw XmlException(code, _line);
765 }
766
767 // ATTN: Currently do not support 16-bit characters
768 if (newChar > 0xff)
769 {
770 // ATTN: Is there a good way to say "unsupported"?
771 throw XmlException(code, _line);
772 }
773
774 referenceChar = Uint16(newChar);
775 referenceLength = numDigits + 3;
776 }
777 }
778 else
779 {
780 // Check for entity reference
781 // ATTN: Inefficient if many entity references are supported
782 Uint32 i;
783 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++)
784 {
785 Uint32 length = _references[i].length;
786 const char* match = _references[i].match;
787
788 if (strncmp(p, _references[i].match, length) == 0)
789 {
790 referenceChar = _references[i].replacement;
791 referenceLength = length;
792 break;
793 }
794 }
795
796 if (i == _REFERENCES_SIZE)
797 {
798 // Didn't recognize the entity reference
799 // ATTN: Is there a good way to say "unsupported"?
800 throw XmlException(code, _line);
801 }
802 }
803
804 kumpf 1.18 // Replace the reference with the correct character
805 *p = (char)referenceChar;
806 char* q = p + referenceLength;
807 rem = rem - referenceLength + 1;
808 memmove(p + 1, q, rem);
|
809 mike 1.13 }
810 }
811 }
812
813 static const char _EMPTY_STRING[] = "";
814
815 void XmlParser::_getElement(char*& p, XmlEntry& entry)
816 {
817 entry.attributeCount = 0;
818
819 //--------------------------------------------------------------------------
820 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
821 //--------------------------------------------------------------------------
822
823 if (*p == '?')
824 {
825 entry.type = XmlEntry::XML_DECLARATION;
826 entry.text = ++p;
827
828 Boolean openCloseElement = false;
829
830 mike 1.13 if (_getElementName(p))
831 return;
832 }
833 else if (*p == '!')
834 {
835 p++;
836
837 // Expect a comment or CDATA:
838
839 if (p[0] == '-' && p[1] == '-')
840 {
841 p += 2;
842 entry.type = XmlEntry::COMMENT;
843 entry.text = p;
844 _getComment(p);
845 return;
846 }
847 else if (memcmp(p, "[CDATA[", 7) == 0)
848 {
849 p += 7;
850 entry.type = XmlEntry::CDATA;
851 mike 1.13 entry.text = p;
852 _getCData(p);
853 return;
854 }
855 else if (memcmp(p, "DOCTYPE", 7) == 0)
856 {
857 entry.type = XmlEntry::DOCTYPE;
858 entry.text = _EMPTY_STRING;
859 _getDocType(p);
860 return;
861 }
862 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
863 }
864 else if (*p == '/')
865 {
866 entry.type = XmlEntry::END_TAG;
867 entry.text = ++p;
868
869 if (!_getElementName(p))
870 throw(XmlException(XmlException::BAD_END_TAG, _line));
871
872 mike 1.13 return;
873 }
874 else if (isalpha(*p) || *p == '_')
875 {
876 entry.type = XmlEntry::START_TAG;
877 entry.text = p;
878
879 Boolean openCloseElement = false;
880
881 if (_getOpenElementName(p, openCloseElement))
882 {
883 if (openCloseElement)
884 entry.type = XmlEntry::EMPTY_TAG;
885 return;
886 }
887 }
888 else
889 throw XmlException(XmlException::BAD_START_TAG, _line);
890
891 //--------------------------------------------------------------------------
892 // Grab all the attributes:
893 mike 1.13 //--------------------------------------------------------------------------
894
895 for (;;)
896 {
897 if (entry.type == XmlEntry::XML_DECLARATION)
898 {
899 if (p[0] == '?' && p[1] == '>')
900 {
901 p += 2;
902 return;
903 }
904 }
905 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
906 {
907 entry.type = XmlEntry::EMPTY_TAG;
908 p += 2;
909 return;
910 }
911 else if (*p == '>')
912 {
913 p++;
914 mike 1.13 return;
915 }
916
917 XmlAttribute attr;
918 attr.name = p;
919 _getAttributeNameAndEqual(p);
920
921 if (*p != '"' && *p != '\'')
922 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
923
924 attr.value = p + 1;
925 _getAttributeValue(p);
926
927 if (entry.type == XmlEntry::XML_DECLARATION)
928 {
929 // The next thing must a space or a "?>":
930
931 if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))
932 {
933 throw XmlException(
934 XmlException::BAD_ATTRIBUTE_VALUE, _line);
935 mike 1.13 }
936 }
937 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))
938 {
939 // The next thing must be a space or a '>':
940
941 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
942 }
943
944 _skipWhitespace(p);
945
946 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
947 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
948
949 _substituteReferences((char*)attr.value);
950 entry.attributes[entry.attributeCount++] = attr;
951 }
952 }
953
954 static const char* _typeStrings[] =
955 {
956 mike 1.13 "XML_DECLARATION",
957 "START_TAG",
958 "EMPTY_TAG",
959 "END_TAG",
960 "COMMENT",
961 "CDATA",
962 "DOCTYPE",
963 "CONTENT"
964 };
965
966 void XmlEntry::print() const
967 {
968 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
969
970 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
971
972 if (needQuotes)
973 PEGASUS_STD(cout) << "\"";
974
975 _printValue(text);
976
977 mike 1.13 if (needQuotes)
978 PEGASUS_STD(cout) << "\"";
979
980 PEGASUS_STD(cout) << '\n';
981
982 for (Uint32 i = 0; i < attributeCount; i++)
983 {
984 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
985 _printValue(attributes[i].value);
986 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
987 }
988 }
989
990 const XmlAttribute* XmlEntry::findAttribute(
991 const char* name) const
992 {
993 for (Uint32 i = 0; i < attributeCount; i++)
994 {
995 if (strcmp(attributes[i].name, name) == 0)
996 return &attributes[i];
997 }
998 mike 1.13
999 return 0;
1000 }
1001
1002 // Find first non-whitespace character (set first) and last non-whitespace
1003 // character (set last one past this). For example, consider this string:
1004 //
1005 // " 87 "
1006 //
1007 // The first pointer would point to '8' and the last pointer woudl point one
1008 // beyond '7'.
1009
1010 static void _findEnds(
1011 const char* str,
1012 const char*& first,
1013 const char*& last)
1014 {
1015 first = str;
1016
1017 while (isspace(*first))
1018 first++;
1019 mike 1.13
1020 if (!*first)
1021 {
1022 last = first;
1023 return;
1024 }
1025
1026 last = first + strlen(first);
1027
1028 while (last != first && isspace(last[-1]))
1029 last--;
1030 }
1031
1032 Boolean XmlEntry::getAttributeValue(
1033 const char* name,
1034 Uint32& value) const
1035 {
1036 const XmlAttribute* attr = findAttribute(name);
1037
1038 if (!attr)
1039 return false;
1040 mike 1.13
1041 const char* first;
1042 const char* last;
1043 _findEnds(attr->value, first, last);
1044
1045 char* end = 0;
1046 long tmp = strtol(first, &end, 10);
1047
1048 if (!end || end != last)
1049 return false;
1050
1051 value = Uint32(tmp);
1052 return true;
1053 }
1054
1055 Boolean XmlEntry::getAttributeValue(
1056 const char* name,
1057 Real32& value) const
1058 {
1059 const XmlAttribute* attr = findAttribute(name);
1060
1061 mike 1.13 if (!attr)
1062 return false;
1063
1064 const char* first;
1065 const char* last;
1066 _findEnds(attr->value, first, last);
1067
1068 char* end = 0;
1069 double tmp = strtod(first, &end);
1070
1071 if (!end || end != last)
1072 return false;
1073
1074 value = Uint32(tmp);
1075 return true;
1076 }
1077
1078 Boolean XmlEntry::getAttributeValue(
1079 const char* name,
1080 const char*& value) const
1081 {
1082 mike 1.13 const XmlAttribute* attr = findAttribute(name);
1083
1084 if (!attr)
1085 return false;
1086
1087 value = attr->value;
1088 return true;
1089 }
1090
1091 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1092 {
1093 const char* tmp;
1094
1095 if (!getAttributeValue(name, tmp))
1096 return false;
1097
1098 value = tmp;
1099 return true;
1100 }
1101
1102 void XmlAppendCString(Array<Sint8>& out, const char* str)
1103 mike 1.13 {
1104 out.append(str, strlen(str));
1105 }
1106
1107 PEGASUS_NAMESPACE_END
|