1 mike 1.13 //%/////////////////////////////////////////////////////////////////////////////
2 //
|
3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
4 // The Open Group, Tivoli Systems
|
5 mike 1.13 //
6 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
8 // deal in the Software without restriction, including without limitation the
9 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
10 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
11 // furnished to do so, subject to the following conditions:
12 //
|
13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
14 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
15 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
17 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 //
22 //==============================================================================
23 //
24 // Author: Mike Brasher (mbrasher@bmc.com)
25 //
26 // Modified By:
27 //
28 //%/////////////////////////////////////////////////////////////////////////////
29
30 ////////////////////////////////////////////////////////////////////////////////
31 //
32 // XmlParser
33 //
34 // This file contains a simple non-validating XML parser. Here are
35 // serveral rules for well-formed XML:
36 //
37 // 1. Documents must begin with an XML declaration:
38 //
39 // <?xml version="1.0" standalone="yes"?>
40 mike 1.13 //
41 // 2. Comments have the form:
42 //
43 // <!-- blah blah blah -->
44 //
45 // 3. The following entity references are supported:
46 //
47 // & - ampersand
48 // < - less-than
49 // > - greater-than
50 // " - full quote
51 // &apos - apostrophe
52 //
|
53 kumpf 1.18 // as well as character (numeric) references:
54
55 // 1 - decimal reference for character '1'
56 // 1 - hexadecimal reference for character '1'
57 //
|
58 mike 1.13 // 4. Element names and attribute names take the following form:
59 //
60 // [A-Za-z_][A-Za-z_0-9-.:]
61 //
62 // 5. Arbitrary data (CDATA) can be enclosed like this:
63 //
64 // <![CDATA[
65 // ...
66 // ]]>
67 //
68 // 6. Element names and attributes names are case-sensitive.
69 //
70 // 7. XmlAttribute values must be delimited by full or half quotes.
71 // XmlAttribute values must be delimited.
72 //
73 // 8. <!DOCTYPE...>
74 //
75 // TODO:
76 //
|
77 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
|
78 mike 1.13 // Handle <!DOCTYPE...> sections which are complicated (containing
79 // rules rather than references to files).
80 //
81 // Remove newlines from string literals:
82 //
83 // Example: <xyz x="hello
84 // world">
85 //
86 ////////////////////////////////////////////////////////////////////////////////
87
|
88 sage 1.14 #include <Pegasus/Common/Config.h>
|
89 mike 1.13 #include <cctype>
90 #include <cstdio>
91 #include <cstdlib>
92 #include <cstring>
93 #include "XmlParser.h"
94 #include "Logger.h"
|
95 chuck 1.19 #include "ExceptionRep.h"
|
96 mike 1.13
97 PEGASUS_NAMESPACE_BEGIN
98
99 #define PEGASUS_ARRAY_T XmlEntry
100 # include "ArrayImpl.h"
101 #undef PEGASUS_ARRAY_T
102
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
114 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 mike 1.13 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
122 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
142 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
143
144 // Remove all redundant spaces from the given string:
145
146 static void _normalize(char* text)
147 {
148 Uint32 length = strlen(text);
149 char* p = text;
150 char* end = p + length;
151
152 // Remove leading spaces:
153
154 mike 1.13 while (isspace(*p))
155 p++;
156
157 if (p != text)
158 memmove(text, p, end - p + 1);
159
160 p = text;
161
162 // Look for sequences of more than one space and remove all but one.
163
164 for (;;)
165 {
166 // Advance to the next space:
167
168 while (*p && !isspace(*p))
169 p++;
170
171 if (!*p)
172 break;
173
174 // Advance to the next non-space:
175 mike 1.13
176 char* q = p++;
177
178 while (isspace(*p))
179 p++;
180
181 // Discard trailing spaces (if we are at the end):
182
183 if (!*p)
184 {
185 *q = '\0';
186 break;
187 }
188
189 // Remove the redundant spaces:
190
191 Uint32 n = p - q;
192
193 if (n > 1)
194 {
195 *q++ = ' ';
196 mike 1.13 memmove(q, p, end - p + 1);
197 p = q;
198 }
199 }
200 }
201
202 ////////////////////////////////////////////////////////////////////////////////
203 //
204 // XmlException
205 //
206 ////////////////////////////////////////////////////////////////////////////////
207
208 static const char* _xmlMessages[] =
209 {
210 "Bad opening element",
211 "Bad closing element",
212 "Bad attribute name",
213 "Exepected equal sign",
214 "Bad attribute value",
215 "A \"--\" sequence found within comment",
216 "Unterminated comment",
217 mike 1.13 "Unterminated CDATA block",
218 "Unterminated DOCTYPE",
219 "Too many attributes: parser only handles 10",
220 "Malformed reference",
221 "Expected a comment or CDATA following \"<!\" sequence",
222 "Closing element does not match opening element",
223 "One or more tags are still open",
224 "More than one root element was encountered",
225 "Validation error",
226 "Semantic error"
227 };
228
|
229 chuck 1.19 static const char* _xmlKeys[] =
230 {
|
231 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
232 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
233 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
234 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
235 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
236 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
237 "Common.XmlParser.UNTERMINATED_COMMENT",
238 "Common.XmlParser.UNTERMINATED_CDATA",
239 "Common.XmlParser.UNTERMINATED_DOCTYPE",
240 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
241 "Common.XmlParser.MALFORMED_REFERENCE",
242 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
243 "Common.XmlParser.START_END_MISMATCH",
244 "Common.XmlParser.UNCLOSED_TAGS",
245 "Common.XmlParser.MULTIPLE_ROOTS",
246 "Common.XmlParser.VALIDATION_ERROR",
247 "Common.XmlParser.SEMANTIC_ERROR"
248 };
249
250 // l10n TODO replace _formMessage with the commented one and uncomment
251 // the new constructors
252 /*
|
253 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
254 {
255 String result = _xmlMessages[Uint32(code) - 1];
256
257 char buffer[32];
258 sprintf(buffer, "%d", line);
259 result.append(": on line ");
260 result.append(buffer);
261
262 if (message.size())
263 {
264 result.append(": ");
265 result.append(message);
266 }
267
268 return result;
269 }
|
270 chuck 1.19 */
271
272 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
273 {
274 String dftMsg = _xmlMessages[Uint32(code) - 1];
275 String key = _xmlKeys[Uint32(code) - 1];
276 String msg = message;
277
278 dftMsg.append(": on line $0");
279 if (message.size())
280 {
|
281 humberto 1.20 msg = ": " + msg;
|
282 chuck 1.19 dftMsg.append("$1");
283 }
284
285 return MessageLoaderParms(key, dftMsg, line ,msg);
286 }
287
288 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
289 {
290 String dftMsg = _xmlMessages[Uint32(code) - 1];
291 String key = _xmlKeys[Uint32(code) - 1];
292
293 dftMsg.append(": on line $0");
294
295 return MessageLoaderParms(key, dftMsg, line);
296 }
297
|
298 mike 1.13
299 XmlException::XmlException(
300 XmlException::Code code,
301 Uint32 lineNumber,
302 const String& message)
303 : Exception(_formMessage(code, lineNumber, message))
304 {
305
306 }
307
|
308 chuck 1.19
309 XmlException::XmlException(
310 XmlException::Code code,
311 Uint32 lineNumber,
312 MessageLoaderParms& msgParms)
313 : Exception(_formPartialMessage(code, lineNumber))
314 {
|
315 humberto 1.21 if (msgParms.default_msg.size())
316 {
317 msgParms.default_msg = ": " + msgParms.default_msg;
318 }
|
319 chuck 1.19 _rep->message.append(MessageLoader::getMessage(msgParms));
320 }
321
322
|
323 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
324 //
325 // XmlValidationError
326 //
327 ////////////////////////////////////////////////////////////////////////////////
328
329 XmlValidationError::XmlValidationError(
330 Uint32 lineNumber,
331 const String& message)
332 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
333 {
334
335 }
336
|
337 chuck 1.19
338 XmlValidationError::XmlValidationError(
339 Uint32 lineNumber,
340 MessageLoaderParms& msgParms)
341 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
342 {
343
344 }
345
346
|
347 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
348 //
349 // XmlSemanticError
350 //
351 ////////////////////////////////////////////////////////////////////////////////
352
353 XmlSemanticError::XmlSemanticError(
354 Uint32 lineNumber,
355 const String& message)
356 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
357 {
358
359 }
|
360 chuck 1.19
361
362 XmlSemanticError::XmlSemanticError(
363 Uint32 lineNumber,
364 MessageLoaderParms& msgParms)
365 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
366 {
367
368 }
369
|
370 mike 1.13
371 ////////////////////////////////////////////////////////////////////////////////
372 //
373 // XmlParser
374 //
375 ////////////////////////////////////////////////////////////////////////////////
376
377 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
378 _restoreChar('\0'), _foundRoot(false)
379 {
380
381 }
382
383 Boolean XmlParser::next(XmlEntry& entry)
384 {
385 if (!_putBackStack.isEmpty())
386 {
387 entry = _putBackStack.top();
388 _putBackStack.pop();
389 return true;
390 }
391 mike 1.13
392 // If a character was overwritten with a null-terminator the last
393 // time this routine was called, then put back that character. Before
394 // exiting of course, restore the null-terminator.
395
396 char* nullTerminator = 0;
397
398 if (_restoreChar && !*_current)
399 {
400 nullTerminator = _current;
401 *_current = _restoreChar;
402 _restoreChar = '\0';
403 }
404
405 // Skip over any whitespace:
406
407 _skipWhitespace(_current);
408
409 if (!*_current)
410 {
411 if (nullTerminator)
412 mike 1.13 *nullTerminator = '\0';
413
414 if (!_stack.isEmpty())
415 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
416
417 return false;
418 }
419
420 // Either a "<...>" or content begins next:
421
422 if (*_current == '<')
423 {
424 _current++;
425 _getElement(_current, entry);
426
427 if (nullTerminator)
428 *nullTerminator = '\0';
429
430 if (entry.type == XmlEntry::START_TAG)
431 {
432 if (_stack.isEmpty() && _foundRoot)
433 mike 1.13 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
434
435 _foundRoot = true;
436 _stack.push((char*)entry.text);
437 }
438 else if (entry.type == XmlEntry::END_TAG)
439 {
440 if (_stack.isEmpty())
441 throw XmlException(XmlException::START_END_MISMATCH, _line);
442
443 if (strcmp(_stack.top(), entry.text) != 0)
444 throw XmlException(XmlException::START_END_MISMATCH, _line);
445
446 _stack.pop();
447 }
448
449 return true;
450 }
451 else
452 {
453 entry.type = XmlEntry::CONTENT;
454 mike 1.13 entry.text = _current;
455 _getContent(_current);
456 _restoreChar = *_current;
457 *_current = '\0';
458
459 if (nullTerminator)
460 *nullTerminator = '\0';
461
462 _substituteReferences((char*)entry.text);
463 _normalize((char*)entry.text);
464
465 return true;
466 }
467 }
468
469 void XmlParser::putBack(XmlEntry& entry)
470 {
471 _putBackStack.push(entry);
472 }
473
474 XmlParser::~XmlParser()
475 mike 1.13 {
476 // Nothing to do!
477 }
478
479 void XmlParser::_skipWhitespace(char*& p)
480 {
481 while (*p && isspace(*p))
482 {
483 if (*p == '\n')
484 _line++;
485
486 p++;
487 }
488 }
489
490 Boolean XmlParser::_getElementName(char*& p)
491 {
|
492 david 1.22 if (!String::isUTF8(p))
|
493 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
494
|
495 david 1.22 while ((*p) &&
496 (((*p >= 'A') && (*p <= 'Z')) ||
497 ((*p >= 'a') && (*p <= 'z')) ||
498 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
499 mike 1.13 p++;
500
501 // The next character must be a space:
502
503 if (isspace(*p))
504 {
505 *p++ = '\0';
506 _skipWhitespace(p);
507 }
508
509 if (*p == '>')
510 {
511 *p++ = '\0';
512 return true;
513 }
514
515 return false;
516 }
517
518 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
519 {
520 mike 1.13 openCloseElement = false;
521
|
522 david 1.22 if (!String::isUTF8(p))
|
523 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
524
|
525 david 1.22 while ((*p) &&
526 (((*p >= 'A') && (*p <= 'Z')) ||
527 ((*p >= 'a') && (*p <= 'z')) ||
528 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
529 mike 1.13 p++;
530
531 // The next character must be a space:
532
533 if (isspace(*p))
534 {
535 *p++ = '\0';
536 _skipWhitespace(p);
537 }
538
539 if (*p == '>')
540 {
541 *p++ = '\0';
542 return true;
543 }
544
545 if (p[0] == '/' && p[1] == '>')
546 {
547 openCloseElement = true;
548 *p = '\0';
549 p += 2;
550 mike 1.13 return true;
551 }
552
553 return false;
554 }
555
556 void XmlParser::_getAttributeNameAndEqual(char*& p)
557 {
|
558 david 1.22 if (!String::isUTF8(p))
|
559 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
560
|
561 david 1.22 while ((*p) &&
562 (((*p >= 'A') && (*p <= 'Z')) ||
563 ((*p >= 'a') && (*p <= 'z')) ||
564 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
565 mike 1.13 p++;
566
567 char* term = p;
568
569 _skipWhitespace(p);
570
571 if (*p != '=')
572 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
573
574 p++;
575
576 _skipWhitespace(p);
577
578 *term = '\0';
579 }
580
581 void XmlParser::_getAttributeValue(char*& p)
582 {
583 // ATTN-B: handle values contained in semiquotes:
584
585 if (*p != '"' && *p != '\'')
586 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
587
588 char startChar = *p++;
589
590 while (*p && *p != startChar)
591 p++;
592
593 if (*p != startChar)
594 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
595
596 *p++ = '\0';
597 }
598
599 void XmlParser::_getComment(char*& p)
600 {
601 // Now p points to first non-whitespace character beyond "<--" sequence:
602
603 for (; *p; p++)
604 {
605 if (p[0] == '-' && p[1] == '-')
606 {
607 mike 1.13 if (p[2] != '>')
608 {
609 throw XmlException(
610 XmlException::MINUS_MINUS_IN_COMMENT, _line);
611 }
612
613 // Find end of comment (excluding whitespace):
614
615 *p = '\0';
616 p += 3;
617 return;
618 }
619 }
620
621 // If it got this far, then the comment is unterminated:
622
623 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
624 }
625
626 void XmlParser::_getCData(char*& p)
627 {
628 mike 1.13 // At this point p points one past "<![CDATA[" sequence:
629
630 for (; *p; p++)
631 {
632 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
633 {
634 *p = '\0';
635 p += 3;
636 return;
637 }
638 else if (*p == '\n')
639 _line++;
640 }
641
642 // If it got this far, then the comment is unterminated:
643
644 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
645 }
646
647 void XmlParser::_getDocType(char*& p)
648 {
649 mike 1.13 // Just ignore the DOCTYPE command for now:
650
651 for (; *p && *p != '>'; p++)
652 {
653 if (*p == '\n')
654 _line++;
655 }
656
657 if (*p != '>')
658 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
659
660 p++;
661 }
662
663 void XmlParser::_getContent(char*& p)
664 {
665 while (*p && *p != '<')
666 {
667 if (*p == '\n')
668 _line++;
669
670 mike 1.13 p++;
671 }
672 }
673
674 void XmlParser::_substituteReferences(char* text)
675 {
676 Uint32 rem = strlen(text);
677
678 for (char* p = text; *p; p++, rem--)
679 {
680 if (*p == '&')
681 {
|
682 kumpf 1.18 // Process character or entity reference
|
683 mike 1.13
|
684 kumpf 1.18 Uint16 referenceChar = 0;
685 Uint32 referenceLength = 0;
686 XmlException::Code code = XmlException::MALFORMED_REFERENCE;
687
688 if (*(p+1) == '#')
689 {
690 // Found a character (numeric) reference
691 // Determine whether it is decimal or hex
692 if (*(p+2) == 'x')
693 {
694 // Decode a hexadecimal character reference
695 char* q = p+3;
696
697 // At most four digits are allowed, plus trailing ';'
698 Uint32 numDigits;
699 for (numDigits = 0; numDigits < 5; numDigits++, q++)
700 {
701 if (isdigit(*q))
702 {
703 referenceChar = (referenceChar << 4);
704 referenceChar += (*q - '0');
705 kumpf 1.18 }
706 else if ((*q >= 'A') && (*q <= 'F'))
707 {
708 referenceChar = (referenceChar << 4);
709 referenceChar += (*q - 'A' + 10);
710 }
711 else if ((*q >= 'a') && (*q <= 'f'))
712 {
713 referenceChar = (referenceChar << 4);
714 referenceChar += (*q - 'a' + 10);
715 }
716 else if (*q == ';')
717 {
718 break;
719 }
720 else
721 {
722 throw XmlException(code, _line);
723 }
724 }
725
726 kumpf 1.18 // Hex number must be 1 - 4 digits
727 if ((numDigits == 0) || (numDigits > 4))
728 {
729 throw XmlException(code, _line);
730 }
731
732 // ATTN: Currently do not support 16-bit characters
733 if (referenceChar > 0xff)
734 {
735 // ATTN: Is there a good way to say "unsupported"?
736 throw XmlException(code, _line);
737 }
738
739 referenceLength = numDigits + 4;
740 }
741 else
742 {
743 // Decode a decimal character reference
744 Uint32 newChar = 0;
745 char* q = p+2;
746
747 kumpf 1.18 // At most five digits are allowed, plus trailing ';'
748 Uint32 numDigits;
749 for (numDigits = 0; numDigits < 6; numDigits++, q++)
750 {
751 if (isdigit(*q))
752 {
753 newChar = (newChar * 10);
754 newChar += (*q - '0');
755 }
756 else if (*q == ';')
757 {
758 break;
759 }
760 else
761 {
762 throw XmlException(code, _line);
763 }
764 }
765
766 // Decimal number must be 1 - 5 digits and fit in 16 bits
767 if ((numDigits == 0) || (numDigits > 5) ||
768 kumpf 1.18 (newChar > 0xffff))
769 {
770 throw XmlException(code, _line);
771 }
772
773 // ATTN: Currently do not support 16-bit characters
774 if (newChar > 0xff)
775 {
776 // ATTN: Is there a good way to say "unsupported"?
777 throw XmlException(code, _line);
778 }
779
780 referenceChar = Uint16(newChar);
781 referenceLength = numDigits + 3;
782 }
783 }
784 else
785 {
786 // Check for entity reference
787 // ATTN: Inefficient if many entity references are supported
788 Uint32 i;
789 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++)
790 {
791 Uint32 length = _references[i].length;
792 const char* match = _references[i].match;
793
794 if (strncmp(p, _references[i].match, length) == 0)
795 {
796 referenceChar = _references[i].replacement;
797 referenceLength = length;
798 break;
799 }
800 }
801
802 if (i == _REFERENCES_SIZE)
803 {
804 // Didn't recognize the entity reference
805 // ATTN: Is there a good way to say "unsupported"?
806 throw XmlException(code, _line);
807 }
808 }
809
810 kumpf 1.18 // Replace the reference with the correct character
811 *p = (char)referenceChar;
812 char* q = p + referenceLength;
813 rem = rem - referenceLength + 1;
814 memmove(p + 1, q, rem);
|
815 mike 1.13 }
816 }
817 }
818
819 static const char _EMPTY_STRING[] = "";
820
821 void XmlParser::_getElement(char*& p, XmlEntry& entry)
822 {
823 entry.attributeCount = 0;
824
825 //--------------------------------------------------------------------------
826 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
827 //--------------------------------------------------------------------------
828
829 if (*p == '?')
830 {
831 entry.type = XmlEntry::XML_DECLARATION;
832 entry.text = ++p;
833
834 Boolean openCloseElement = false;
835
836 mike 1.13 if (_getElementName(p))
837 return;
838 }
839 else if (*p == '!')
840 {
841 p++;
842
843 // Expect a comment or CDATA:
844
845 if (p[0] == '-' && p[1] == '-')
846 {
847 p += 2;
848 entry.type = XmlEntry::COMMENT;
849 entry.text = p;
850 _getComment(p);
851 return;
852 }
853 else if (memcmp(p, "[CDATA[", 7) == 0)
854 {
855 p += 7;
856 entry.type = XmlEntry::CDATA;
857 mike 1.13 entry.text = p;
858 _getCData(p);
859 return;
860 }
861 else if (memcmp(p, "DOCTYPE", 7) == 0)
862 {
863 entry.type = XmlEntry::DOCTYPE;
864 entry.text = _EMPTY_STRING;
865 _getDocType(p);
866 return;
867 }
868 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
869 }
870 else if (*p == '/')
871 {
872 entry.type = XmlEntry::END_TAG;
873 entry.text = ++p;
874
875 if (!_getElementName(p))
876 throw(XmlException(XmlException::BAD_END_TAG, _line));
877
878 mike 1.13 return;
879 }
|
880 david 1.22 else if (String::isUTF8(p))
|
881 mike 1.13 {
882 entry.type = XmlEntry::START_TAG;
883 entry.text = p;
884
885 Boolean openCloseElement = false;
886
887 if (_getOpenElementName(p, openCloseElement))
888 {
889 if (openCloseElement)
890 entry.type = XmlEntry::EMPTY_TAG;
891 return;
892 }
893 }
894 else
895 throw XmlException(XmlException::BAD_START_TAG, _line);
896
897 //--------------------------------------------------------------------------
898 // Grab all the attributes:
899 //--------------------------------------------------------------------------
900
901 for (;;)
902 mike 1.13 {
903 if (entry.type == XmlEntry::XML_DECLARATION)
904 {
905 if (p[0] == '?' && p[1] == '>')
906 {
907 p += 2;
908 return;
909 }
910 }
911 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
912 {
913 entry.type = XmlEntry::EMPTY_TAG;
914 p += 2;
915 return;
916 }
917 else if (*p == '>')
918 {
919 p++;
920 return;
921 }
922
923 mike 1.13 XmlAttribute attr;
924 attr.name = p;
925 _getAttributeNameAndEqual(p);
926
927 if (*p != '"' && *p != '\'')
928 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
929
930 attr.value = p + 1;
931 _getAttributeValue(p);
932
933 if (entry.type == XmlEntry::XML_DECLARATION)
934 {
935 // The next thing must a space or a "?>":
936
937 if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))
938 {
939 throw XmlException(
940 XmlException::BAD_ATTRIBUTE_VALUE, _line);
941 }
942 }
943 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))
944 mike 1.13 {
945 // The next thing must be a space or a '>':
946
947 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
948 }
949
950 _skipWhitespace(p);
951
952 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
953 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
954
955 _substituteReferences((char*)attr.value);
956 entry.attributes[entry.attributeCount++] = attr;
957 }
958 }
959
960 static const char* _typeStrings[] =
961 {
962 "XML_DECLARATION",
963 "START_TAG",
964 "EMPTY_TAG",
965 mike 1.13 "END_TAG",
966 "COMMENT",
967 "CDATA",
968 "DOCTYPE",
969 "CONTENT"
970 };
971
972 void XmlEntry::print() const
973 {
974 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
975
976 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
977
978 if (needQuotes)
979 PEGASUS_STD(cout) << "\"";
980
981 _printValue(text);
982
983 if (needQuotes)
984 PEGASUS_STD(cout) << "\"";
985
986 mike 1.13 PEGASUS_STD(cout) << '\n';
987
988 for (Uint32 i = 0; i < attributeCount; i++)
989 {
990 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
991 _printValue(attributes[i].value);
992 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
993 }
994 }
995
996 const XmlAttribute* XmlEntry::findAttribute(
997 const char* name) const
998 {
999 for (Uint32 i = 0; i < attributeCount; i++)
1000 {
1001 if (strcmp(attributes[i].name, name) == 0)
1002 return &attributes[i];
1003 }
1004
1005 return 0;
1006 }
1007 mike 1.13
1008 // Find first non-whitespace character (set first) and last non-whitespace
1009 // character (set last one past this). For example, consider this string:
1010 //
1011 // " 87 "
1012 //
1013 // The first pointer would point to '8' and the last pointer woudl point one
1014 // beyond '7'.
1015
1016 static void _findEnds(
1017 const char* str,
1018 const char*& first,
1019 const char*& last)
1020 {
1021 first = str;
1022
1023 while (isspace(*first))
1024 first++;
1025
1026 if (!*first)
1027 {
1028 mike 1.13 last = first;
1029 return;
1030 }
1031
1032 last = first + strlen(first);
1033
1034 while (last != first && isspace(last[-1]))
1035 last--;
1036 }
1037
1038 Boolean XmlEntry::getAttributeValue(
1039 const char* name,
1040 Uint32& value) const
1041 {
1042 const XmlAttribute* attr = findAttribute(name);
1043
1044 if (!attr)
1045 return false;
1046
1047 const char* first;
1048 const char* last;
1049 mike 1.13 _findEnds(attr->value, first, last);
1050
1051 char* end = 0;
1052 long tmp = strtol(first, &end, 10);
1053
1054 if (!end || end != last)
1055 return false;
1056
1057 value = Uint32(tmp);
1058 return true;
1059 }
1060
1061 Boolean XmlEntry::getAttributeValue(
1062 const char* name,
1063 Real32& value) const
1064 {
1065 const XmlAttribute* attr = findAttribute(name);
1066
1067 if (!attr)
1068 return false;
1069
1070 mike 1.13 const char* first;
1071 const char* last;
1072 _findEnds(attr->value, first, last);
1073
1074 char* end = 0;
1075 double tmp = strtod(first, &end);
1076
1077 if (!end || end != last)
1078 return false;
1079
1080 value = Uint32(tmp);
1081 return true;
1082 }
1083
1084 Boolean XmlEntry::getAttributeValue(
1085 const char* name,
1086 const char*& value) const
1087 {
1088 const XmlAttribute* attr = findAttribute(name);
1089
1090 if (!attr)
1091 mike 1.13 return false;
1092
1093 value = attr->value;
1094 return true;
1095 }
1096
1097 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1098 {
1099 const char* tmp;
1100
1101 if (!getAttributeValue(name, tmp))
1102 return false;
1103
|
1104 david 1.22 value = String(tmp,STRING_FLAG_UTF8);
|
1105 mike 1.13 return true;
1106 }
1107
1108 void XmlAppendCString(Array<Sint8>& out, const char* str)
1109 {
1110 out.append(str, strlen(str));
1111 }
1112
1113 PEGASUS_NAMESPACE_END
|