1 mike 1.13 //%/////////////////////////////////////////////////////////////////////////////
2 //
|
3 kumpf 1.16 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
4 // The Open Group, Tivoli Systems
|
5 mike 1.13 //
6 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
7 kumpf 1.16 // of this software and associated documentation files (the "Software"), to
8 // deal in the Software without restriction, including without limitation the
9 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
10 mike 1.13 // sell copies of the Software, and to permit persons to whom the Software is
11 // furnished to do so, subject to the following conditions:
12 //
|
13 kumpf 1.16 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
14 mike 1.13 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
15 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
16 kumpf 1.16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
17 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19 mike 1.13 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 //
22 //==============================================================================
23 //
24 // Author: Mike Brasher (mbrasher@bmc.com)
25 //
26 // Modified By:
27 //
28 //%/////////////////////////////////////////////////////////////////////////////
29
30 ////////////////////////////////////////////////////////////////////////////////
31 //
32 // XmlParser
33 //
34 // This file contains a simple non-validating XML parser. Here are
35 // serveral rules for well-formed XML:
36 //
37 // 1. Documents must begin with an XML declaration:
38 //
39 // <?xml version="1.0" standalone="yes"?>
40 mike 1.13 //
41 // 2. Comments have the form:
42 //
43 // <!-- blah blah blah -->
44 //
45 // 3. The following entity references are supported:
46 //
47 // & - ampersand
48 // < - less-than
49 // > - greater-than
50 // " - full quote
51 // &apos - apostrophe
52 //
|
53 kumpf 1.18 // as well as character (numeric) references:
54
55 // 1 - decimal reference for character '1'
56 // 1 - hexadecimal reference for character '1'
57 //
|
58 mike 1.13 // 4. Element names and attribute names take the following form:
59 //
60 // [A-Za-z_][A-Za-z_0-9-.:]
61 //
62 // 5. Arbitrary data (CDATA) can be enclosed like this:
63 //
64 // <![CDATA[
65 // ...
66 // ]]>
67 //
68 // 6. Element names and attributes names are case-sensitive.
69 //
70 // 7. XmlAttribute values must be delimited by full or half quotes.
71 // XmlAttribute values must be delimited.
72 //
73 // 8. <!DOCTYPE...>
74 //
75 // TODO:
76 //
|
77 karl 1.15 // ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is work.
|
78 mike 1.13 // Handle <!DOCTYPE...> sections which are complicated (containing
79 // rules rather than references to files).
80 //
81 // Remove newlines from string literals:
82 //
83 // Example: <xyz x="hello
84 // world">
85 //
86 ////////////////////////////////////////////////////////////////////////////////
87
|
88 sage 1.14 #include <Pegasus/Common/Config.h>
|
89 mike 1.13 #include <cctype>
90 #include <cstdio>
91 #include <cstdlib>
92 #include <cstring>
93 #include "XmlParser.h"
94 #include "Logger.h"
|
95 chuck 1.19 #include "ExceptionRep.h"
|
96 mike 1.13
97 PEGASUS_NAMESPACE_BEGIN
98
99 #define PEGASUS_ARRAY_T XmlEntry
100 # include "ArrayImpl.h"
101 #undef PEGASUS_ARRAY_T
102
103
104 ////////////////////////////////////////////////////////////////////////////////
105 //
106 // Static helper functions
107 //
108 ////////////////////////////////////////////////////////////////////////////////
109
110 static void _printValue(const char* p)
111 {
112 for (; *p; p++)
113 {
114 if (*p == '\n')
115 PEGASUS_STD(cout) << "\\n";
116 else if (*p == '\r')
117 mike 1.13 PEGASUS_STD(cout) << "\\r";
118 else if (*p == '\t')
119 PEGASUS_STD(cout) << "\\t";
120 else
121 PEGASUS_STD(cout) << *p;
122 }
123 }
124
125 struct EntityReference
126 {
127 const char* match;
128 Uint32 length;
129 char replacement;
130 };
131
|
132 kumpf 1.18 // ATTN: Add support for more entity references
|
133 mike 1.13 static EntityReference _references[] =
134 {
135 { "&", 5, '&' },
136 { "<", 4, '<' },
137 { ">", 4, '>' },
138 { """, 6, '"' },
139 { "'", 6, '\'' }
140 };
141
142 static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));
143
144 // Remove all redundant spaces from the given string:
145
146 static void _normalize(char* text)
147 {
148 Uint32 length = strlen(text);
149 char* p = text;
150 char* end = p + length;
151
152 // Remove leading spaces:
153
154 mike 1.13 while (isspace(*p))
155 p++;
156
157 if (p != text)
158 memmove(text, p, end - p + 1);
159
160 p = text;
161
162 // Look for sequences of more than one space and remove all but one.
163
164 for (;;)
165 {
166 // Advance to the next space:
167
168 while (*p && !isspace(*p))
169 p++;
170
171 if (!*p)
172 break;
173
174 // Advance to the next non-space:
175 mike 1.13
176 char* q = p++;
177
178 while (isspace(*p))
179 p++;
180
181 // Discard trailing spaces (if we are at the end):
182
183 if (!*p)
184 {
185 *q = '\0';
186 break;
187 }
188
189 // Remove the redundant spaces:
190
191 Uint32 n = p - q;
192
193 if (n > 1)
194 {
195 *q++ = ' ';
196 mike 1.13 memmove(q, p, end - p + 1);
197 p = q;
198 }
199 }
200 }
201
202 ////////////////////////////////////////////////////////////////////////////////
203 //
204 // XmlException
205 //
206 ////////////////////////////////////////////////////////////////////////////////
207
208 static const char* _xmlMessages[] =
209 {
210 "Bad opening element",
211 "Bad closing element",
212 "Bad attribute name",
213 "Exepected equal sign",
214 "Bad attribute value",
215 "A \"--\" sequence found within comment",
216 "Unterminated comment",
217 mike 1.13 "Unterminated CDATA block",
218 "Unterminated DOCTYPE",
219 "Too many attributes: parser only handles 10",
220 "Malformed reference",
221 "Expected a comment or CDATA following \"<!\" sequence",
222 "Closing element does not match opening element",
223 "One or more tags are still open",
224 "More than one root element was encountered",
225 "Validation error",
226 "Semantic error"
227 };
228
|
229 chuck 1.19 static const char* _xmlKeys[] =
230 {
|
231 humberto 1.20 "Common.XmlParser.BAD_START_TAG",
|
232 chuck 1.19 "Common.XmlParser.BAD_END_TAG",
233 "Common.XmlParser.BAD_ATTRIBUTE_NAME",
234 "Common.XmlParser.EXPECTED_EQUAL_SIGN",
235 "Common.XmlParser.BAD_ATTRIBUTE_VALUE",
236 "Common.XmlParser.MINUS_MINUS_IN_COMMENT",
237 "Common.XmlParser.UNTERMINATED_COMMENT",
238 "Common.XmlParser.UNTERMINATED_CDATA",
239 "Common.XmlParser.UNTERMINATED_DOCTYPE",
240 "Common.XmlParser.TOO_MANY_ATTRIBUTES",
241 "Common.XmlParser.MALFORMED_REFERENCE",
242 "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",
243 "Common.XmlParser.START_END_MISMATCH",
244 "Common.XmlParser.UNCLOSED_TAGS",
245 "Common.XmlParser.MULTIPLE_ROOTS",
246 "Common.XmlParser.VALIDATION_ERROR",
247 "Common.XmlParser.SEMANTIC_ERROR"
248 };
249
|
250 chuck 1.23 // l10n replace _formMessage (comment out the old one)
|
251 chuck 1.19 /*
|
252 mike 1.13 static String _formMessage(Uint32 code, Uint32 line, const String& message)
253 {
254 String result = _xmlMessages[Uint32(code) - 1];
255
256 char buffer[32];
257 sprintf(buffer, "%d", line);
258 result.append(": on line ");
259 result.append(buffer);
260
261 if (message.size())
262 {
263 result.append(": ");
264 result.append(message);
265 }
266
267 return result;
268 }
|
269 chuck 1.19 */
270
271 static MessageLoaderParms _formMessage(Uint32 code, Uint32 line, const String& message)
272 {
273 String dftMsg = _xmlMessages[Uint32(code) - 1];
274 String key = _xmlKeys[Uint32(code) - 1];
275 String msg = message;
276
277 dftMsg.append(": on line $0");
278 if (message.size())
279 {
|
280 humberto 1.20 msg = ": " + msg;
|
281 chuck 1.19 dftMsg.append("$1");
282 }
283
284 return MessageLoaderParms(key, dftMsg, line ,msg);
285 }
286
287 static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line)
288 {
289 String dftMsg = _xmlMessages[Uint32(code) - 1];
290 String key = _xmlKeys[Uint32(code) - 1];
291
292 dftMsg.append(": on line $0");
293
294 return MessageLoaderParms(key, dftMsg, line);
295 }
296
|
297 mike 1.13
298 XmlException::XmlException(
299 XmlException::Code code,
300 Uint32 lineNumber,
301 const String& message)
302 : Exception(_formMessage(code, lineNumber, message))
303 {
304
305 }
306
|
307 chuck 1.19
308 XmlException::XmlException(
309 XmlException::Code code,
310 Uint32 lineNumber,
311 MessageLoaderParms& msgParms)
312 : Exception(_formPartialMessage(code, lineNumber))
313 {
|
314 humberto 1.21 if (msgParms.default_msg.size())
315 {
316 msgParms.default_msg = ": " + msgParms.default_msg;
317 }
|
318 chuck 1.19 _rep->message.append(MessageLoader::getMessage(msgParms));
319 }
320
321
|
322 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
323 //
324 // XmlValidationError
325 //
326 ////////////////////////////////////////////////////////////////////////////////
327
328 XmlValidationError::XmlValidationError(
329 Uint32 lineNumber,
330 const String& message)
331 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message)
332 {
333
334 }
335
|
336 chuck 1.19
337 XmlValidationError::XmlValidationError(
338 Uint32 lineNumber,
339 MessageLoaderParms& msgParms)
340 : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms)
341 {
342
343 }
344
345
|
346 mike 1.13 ////////////////////////////////////////////////////////////////////////////////
347 //
348 // XmlSemanticError
349 //
350 ////////////////////////////////////////////////////////////////////////////////
351
352 XmlSemanticError::XmlSemanticError(
353 Uint32 lineNumber,
354 const String& message)
355 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message)
356 {
357
358 }
|
359 chuck 1.19
360
361 XmlSemanticError::XmlSemanticError(
362 Uint32 lineNumber,
363 MessageLoaderParms& msgParms)
364 : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms)
365 {
366
367 }
368
|
369 mike 1.13
370 ////////////////////////////////////////////////////////////////////////////////
371 //
372 // XmlParser
373 //
374 ////////////////////////////////////////////////////////////////////////////////
375
376 XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text),
377 _restoreChar('\0'), _foundRoot(false)
378 {
379
380 }
381
382 Boolean XmlParser::next(XmlEntry& entry)
383 {
384 if (!_putBackStack.isEmpty())
385 {
386 entry = _putBackStack.top();
387 _putBackStack.pop();
388 return true;
389 }
390 mike 1.13
391 // If a character was overwritten with a null-terminator the last
392 // time this routine was called, then put back that character. Before
393 // exiting of course, restore the null-terminator.
394
395 char* nullTerminator = 0;
396
397 if (_restoreChar && !*_current)
398 {
399 nullTerminator = _current;
400 *_current = _restoreChar;
401 _restoreChar = '\0';
402 }
403
404 // Skip over any whitespace:
405
406 _skipWhitespace(_current);
407
408 if (!*_current)
409 {
410 if (nullTerminator)
411 mike 1.13 *nullTerminator = '\0';
412
413 if (!_stack.isEmpty())
414 throw XmlException(XmlException::UNCLOSED_TAGS, _line);
415
416 return false;
417 }
418
419 // Either a "<...>" or content begins next:
420
421 if (*_current == '<')
422 {
423 _current++;
424 _getElement(_current, entry);
425
426 if (nullTerminator)
427 *nullTerminator = '\0';
428
429 if (entry.type == XmlEntry::START_TAG)
430 {
431 if (_stack.isEmpty() && _foundRoot)
432 mike 1.13 throw XmlException(XmlException::MULTIPLE_ROOTS, _line);
433
434 _foundRoot = true;
435 _stack.push((char*)entry.text);
436 }
437 else if (entry.type == XmlEntry::END_TAG)
438 {
439 if (_stack.isEmpty())
440 throw XmlException(XmlException::START_END_MISMATCH, _line);
441
442 if (strcmp(_stack.top(), entry.text) != 0)
443 throw XmlException(XmlException::START_END_MISMATCH, _line);
444
445 _stack.pop();
446 }
447
448 return true;
449 }
450 else
451 {
452 entry.type = XmlEntry::CONTENT;
453 mike 1.13 entry.text = _current;
454 _getContent(_current);
455 _restoreChar = *_current;
456 *_current = '\0';
457
458 if (nullTerminator)
459 *nullTerminator = '\0';
460
461 _substituteReferences((char*)entry.text);
462 _normalize((char*)entry.text);
463
464 return true;
465 }
466 }
467
468 void XmlParser::putBack(XmlEntry& entry)
469 {
470 _putBackStack.push(entry);
471 }
472
473 XmlParser::~XmlParser()
474 mike 1.13 {
475 // Nothing to do!
476 }
477
478 void XmlParser::_skipWhitespace(char*& p)
479 {
480 while (*p && isspace(*p))
481 {
482 if (*p == '\n')
483 _line++;
484
485 p++;
486 }
487 }
488
489 Boolean XmlParser::_getElementName(char*& p)
490 {
|
491 kumpf 1.24 if (!String::isUTF8(p) ||
492 !(((*p >= 'A') && (*p <= 'Z')) ||
493 ((*p >= 'a') && (*p <= 'z')) ||
494 (*p == '_')))
|
495 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
|
496 kumpf 1.24 p++;
|
497 mike 1.13
|
498 david 1.22 while ((*p) &&
499 (((*p >= 'A') && (*p <= 'Z')) ||
500 ((*p >= 'a') && (*p <= 'z')) ||
|
501 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
502 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
503 mike 1.13 p++;
504
505 // The next character must be a space:
506
507 if (isspace(*p))
508 {
509 *p++ = '\0';
510 _skipWhitespace(p);
511 }
512
513 if (*p == '>')
514 {
515 *p++ = '\0';
516 return true;
517 }
518
519 return false;
520 }
521
522 Boolean XmlParser::_getOpenElementName(char*& p, Boolean& openCloseElement)
523 {
524 mike 1.13 openCloseElement = false;
525
|
526 kumpf 1.24 if (!String::isUTF8(p) ||
527 !(((*p >= 'A') && (*p <= 'Z')) ||
528 ((*p >= 'a') && (*p <= 'z')) ||
529 (*p == '_')))
|
530 mike 1.13 throw XmlException(XmlException::BAD_START_TAG, _line);
|
531 kumpf 1.24 p++;
|
532 mike 1.13
|
533 david 1.22 while ((*p) &&
534 (((*p >= 'A') && (*p <= 'Z')) ||
535 ((*p >= 'a') && (*p <= 'z')) ||
|
536 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
537 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
538 mike 1.13 p++;
539
540 // The next character must be a space:
541
542 if (isspace(*p))
543 {
544 *p++ = '\0';
545 _skipWhitespace(p);
546 }
547
548 if (*p == '>')
549 {
550 *p++ = '\0';
551 return true;
552 }
553
554 if (p[0] == '/' && p[1] == '>')
555 {
556 openCloseElement = true;
557 *p = '\0';
558 p += 2;
559 mike 1.13 return true;
560 }
561
562 return false;
563 }
564
565 void XmlParser::_getAttributeNameAndEqual(char*& p)
566 {
|
567 kumpf 1.24 if (!String::isUTF8(p) ||
568 !(((*p >= 'A') && (*p <= 'Z')) ||
569 ((*p >= 'a') && (*p <= 'z')) ||
570 (*p == '_')))
|
571 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
|
572 kumpf 1.24 p++;
|
573 mike 1.13
|
574 david 1.22 while ((*p) &&
575 (((*p >= 'A') && (*p <= 'Z')) ||
576 ((*p >= 'a') && (*p <= 'z')) ||
|
577 kumpf 1.24 ((*p >= '0') && (*p <= '9')) ||
|
578 david 1.22 *p == '_' || *p == '-' || *p == ':' || *p == '.'))
|
579 mike 1.13 p++;
580
581 char* term = p;
582
583 _skipWhitespace(p);
584
585 if (*p != '=')
586 throw XmlException(XmlException::BAD_ATTRIBUTE_NAME, _line);
587
588 p++;
589
590 _skipWhitespace(p);
591
592 *term = '\0';
593 }
594
595 void XmlParser::_getAttributeValue(char*& p)
596 {
597 // ATTN-B: handle values contained in semiquotes:
598
599 if (*p != '"' && *p != '\'')
600 mike 1.13 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
601
602 char startChar = *p++;
603
604 while (*p && *p != startChar)
605 p++;
606
607 if (*p != startChar)
608 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
609
610 *p++ = '\0';
611 }
612
613 void XmlParser::_getComment(char*& p)
614 {
615 // Now p points to first non-whitespace character beyond "<--" sequence:
616
617 for (; *p; p++)
618 {
619 if (p[0] == '-' && p[1] == '-')
620 {
621 mike 1.13 if (p[2] != '>')
622 {
623 throw XmlException(
624 XmlException::MINUS_MINUS_IN_COMMENT, _line);
625 }
626
627 // Find end of comment (excluding whitespace):
628
629 *p = '\0';
630 p += 3;
631 return;
632 }
633 }
634
635 // If it got this far, then the comment is unterminated:
636
637 throw XmlException(XmlException::UNTERMINATED_COMMENT, _line);
638 }
639
640 void XmlParser::_getCData(char*& p)
641 {
642 mike 1.13 // At this point p points one past "<![CDATA[" sequence:
643
644 for (; *p; p++)
645 {
646 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
647 {
648 *p = '\0';
649 p += 3;
650 return;
651 }
652 else if (*p == '\n')
653 _line++;
654 }
655
656 // If it got this far, then the comment is unterminated:
657
658 throw XmlException(XmlException::UNTERMINATED_CDATA, _line);
659 }
660
661 void XmlParser::_getDocType(char*& p)
662 {
663 mike 1.13 // Just ignore the DOCTYPE command for now:
664
665 for (; *p && *p != '>'; p++)
666 {
667 if (*p == '\n')
668 _line++;
669 }
670
671 if (*p != '>')
672 throw XmlException(XmlException::UNTERMINATED_DOCTYPE, _line);
673
674 p++;
675 }
676
677 void XmlParser::_getContent(char*& p)
678 {
679 while (*p && *p != '<')
680 {
681 if (*p == '\n')
682 _line++;
683
684 mike 1.13 p++;
685 }
686 }
687
688 void XmlParser::_substituteReferences(char* text)
689 {
690 Uint32 rem = strlen(text);
691
692 for (char* p = text; *p; p++, rem--)
693 {
694 if (*p == '&')
695 {
|
696 kumpf 1.18 // Process character or entity reference
|
697 mike 1.13
|
698 kumpf 1.18 Uint16 referenceChar = 0;
699 Uint32 referenceLength = 0;
700 XmlException::Code code = XmlException::MALFORMED_REFERENCE;
701
702 if (*(p+1) == '#')
703 {
704 // Found a character (numeric) reference
705 // Determine whether it is decimal or hex
706 if (*(p+2) == 'x')
707 {
708 // Decode a hexadecimal character reference
709 char* q = p+3;
710
711 // At most four digits are allowed, plus trailing ';'
712 Uint32 numDigits;
713 for (numDigits = 0; numDigits < 5; numDigits++, q++)
714 {
715 if (isdigit(*q))
716 {
717 referenceChar = (referenceChar << 4);
718 referenceChar += (*q - '0');
719 kumpf 1.18 }
720 else if ((*q >= 'A') && (*q <= 'F'))
721 {
722 referenceChar = (referenceChar << 4);
723 referenceChar += (*q - 'A' + 10);
724 }
725 else if ((*q >= 'a') && (*q <= 'f'))
726 {
727 referenceChar = (referenceChar << 4);
728 referenceChar += (*q - 'a' + 10);
729 }
730 else if (*q == ';')
731 {
732 break;
733 }
734 else
735 {
736 throw XmlException(code, _line);
737 }
738 }
739
740 kumpf 1.18 // Hex number must be 1 - 4 digits
741 if ((numDigits == 0) || (numDigits > 4))
742 {
743 throw XmlException(code, _line);
744 }
745
746 // ATTN: Currently do not support 16-bit characters
747 if (referenceChar > 0xff)
748 {
749 // ATTN: Is there a good way to say "unsupported"?
750 throw XmlException(code, _line);
751 }
752
753 referenceLength = numDigits + 4;
754 }
755 else
756 {
757 // Decode a decimal character reference
758 Uint32 newChar = 0;
759 char* q = p+2;
760
761 kumpf 1.18 // At most five digits are allowed, plus trailing ';'
762 Uint32 numDigits;
763 for (numDigits = 0; numDigits < 6; numDigits++, q++)
764 {
765 if (isdigit(*q))
766 {
767 newChar = (newChar * 10);
768 newChar += (*q - '0');
769 }
770 else if (*q == ';')
771 {
772 break;
773 }
774 else
775 {
776 throw XmlException(code, _line);
777 }
778 }
779
780 // Decimal number must be 1 - 5 digits and fit in 16 bits
781 if ((numDigits == 0) || (numDigits > 5) ||
782 kumpf 1.18 (newChar > 0xffff))
783 {
784 throw XmlException(code, _line);
785 }
786
787 // ATTN: Currently do not support 16-bit characters
788 if (newChar > 0xff)
789 {
790 // ATTN: Is there a good way to say "unsupported"?
791 throw XmlException(code, _line);
792 }
793
794 referenceChar = Uint16(newChar);
795 referenceLength = numDigits + 3;
796 }
797 }
798 else
799 {
800 // Check for entity reference
801 // ATTN: Inefficient if many entity references are supported
802 Uint32 i;
803 kumpf 1.18 for (i = 0; i < _REFERENCES_SIZE; i++)
804 {
805 Uint32 length = _references[i].length;
806 const char* match = _references[i].match;
807
808 if (strncmp(p, _references[i].match, length) == 0)
809 {
810 referenceChar = _references[i].replacement;
811 referenceLength = length;
812 break;
813 }
814 }
815
816 if (i == _REFERENCES_SIZE)
817 {
818 // Didn't recognize the entity reference
819 // ATTN: Is there a good way to say "unsupported"?
820 throw XmlException(code, _line);
821 }
822 }
823
824 kumpf 1.18 // Replace the reference with the correct character
825 *p = (char)referenceChar;
826 char* q = p + referenceLength;
827 rem = rem - referenceLength + 1;
828 memmove(p + 1, q, rem);
|
829 mike 1.13 }
830 }
831 }
832
833 static const char _EMPTY_STRING[] = "";
834
835 void XmlParser::_getElement(char*& p, XmlEntry& entry)
836 {
837 entry.attributeCount = 0;
838
839 //--------------------------------------------------------------------------
840 // Get the element name (expect one of these: '?', '!', [A-Za-z_])
841 //--------------------------------------------------------------------------
842
843 if (*p == '?')
844 {
845 entry.type = XmlEntry::XML_DECLARATION;
846 entry.text = ++p;
847
848 Boolean openCloseElement = false;
849
850 mike 1.13 if (_getElementName(p))
851 return;
852 }
853 else if (*p == '!')
854 {
855 p++;
856
857 // Expect a comment or CDATA:
858
859 if (p[0] == '-' && p[1] == '-')
860 {
861 p += 2;
862 entry.type = XmlEntry::COMMENT;
863 entry.text = p;
864 _getComment(p);
865 return;
866 }
867 else if (memcmp(p, "[CDATA[", 7) == 0)
868 {
869 p += 7;
870 entry.type = XmlEntry::CDATA;
871 mike 1.13 entry.text = p;
872 _getCData(p);
873 return;
874 }
875 else if (memcmp(p, "DOCTYPE", 7) == 0)
876 {
877 entry.type = XmlEntry::DOCTYPE;
878 entry.text = _EMPTY_STRING;
879 _getDocType(p);
880 return;
881 }
882 throw(XmlException(XmlException::EXPECTED_COMMENT_OR_CDATA, _line));
883 }
884 else if (*p == '/')
885 {
886 entry.type = XmlEntry::END_TAG;
887 entry.text = ++p;
888
889 if (!_getElementName(p))
890 throw(XmlException(XmlException::BAD_END_TAG, _line));
891
892 mike 1.13 return;
893 }
|
894 kumpf 1.24 else if (String::isUTF8(p) &&
895 (((*p >= 'A') && (*p <= 'Z')) ||
896 ((*p >= 'a') && (*p <= 'z')) ||
897 (*p == '_')))
|
898 mike 1.13 {
899 entry.type = XmlEntry::START_TAG;
900 entry.text = p;
901
902 Boolean openCloseElement = false;
903
904 if (_getOpenElementName(p, openCloseElement))
905 {
906 if (openCloseElement)
907 entry.type = XmlEntry::EMPTY_TAG;
908 return;
909 }
910 }
911 else
912 throw XmlException(XmlException::BAD_START_TAG, _line);
913
914 //--------------------------------------------------------------------------
915 // Grab all the attributes:
916 //--------------------------------------------------------------------------
917
918 for (;;)
919 mike 1.13 {
920 if (entry.type == XmlEntry::XML_DECLARATION)
921 {
922 if (p[0] == '?' && p[1] == '>')
923 {
924 p += 2;
925 return;
926 }
927 }
928 else if (entry.type == XmlEntry::START_TAG && p[0] == '/' && p[1] =='>')
929 {
930 entry.type = XmlEntry::EMPTY_TAG;
931 p += 2;
932 return;
933 }
934 else if (*p == '>')
935 {
936 p++;
937 return;
938 }
939
940 mike 1.13 XmlAttribute attr;
941 attr.name = p;
942 _getAttributeNameAndEqual(p);
943
944 if (*p != '"' && *p != '\'')
945 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
946
947 attr.value = p + 1;
948 _getAttributeValue(p);
949
950 if (entry.type == XmlEntry::XML_DECLARATION)
951 {
952 // The next thing must a space or a "?>":
953
954 if (!(p[0] == '?' && p[1] == '>') && !isspace(*p))
955 {
956 throw XmlException(
957 XmlException::BAD_ATTRIBUTE_VALUE, _line);
958 }
959 }
960 else if (!(*p == '>' || (p[0] == '/' && p[1] == '>') || isspace(*p)))
961 mike 1.13 {
962 // The next thing must be a space or a '>':
963
964 throw XmlException(XmlException::BAD_ATTRIBUTE_VALUE, _line);
965 }
966
967 _skipWhitespace(p);
968
969 if (entry.attributeCount == XmlEntry::MAX_ATTRIBUTES)
970 throw XmlException(XmlException::TOO_MANY_ATTRIBUTES, _line);
971
972 _substituteReferences((char*)attr.value);
973 entry.attributes[entry.attributeCount++] = attr;
974 }
975 }
976
977 static const char* _typeStrings[] =
978 {
979 "XML_DECLARATION",
980 "START_TAG",
981 "EMPTY_TAG",
982 mike 1.13 "END_TAG",
983 "COMMENT",
984 "CDATA",
985 "DOCTYPE",
986 "CONTENT"
987 };
988
989 void XmlEntry::print() const
990 {
991 PEGASUS_STD(cout) << "=== " << _typeStrings[type] << " ";
992
993 Boolean needQuotes = type == XmlEntry::CDATA || type == XmlEntry::CONTENT;
994
995 if (needQuotes)
996 PEGASUS_STD(cout) << "\"";
997
998 _printValue(text);
999
1000 if (needQuotes)
1001 PEGASUS_STD(cout) << "\"";
1002
1003 mike 1.13 PEGASUS_STD(cout) << '\n';
1004
1005 for (Uint32 i = 0; i < attributeCount; i++)
1006 {
1007 PEGASUS_STD(cout) << " " << attributes[i].name << "=\"";
1008 _printValue(attributes[i].value);
1009 PEGASUS_STD(cout) << "\"" << PEGASUS_STD(endl);
1010 }
1011 }
1012
1013 const XmlAttribute* XmlEntry::findAttribute(
1014 const char* name) const
1015 {
1016 for (Uint32 i = 0; i < attributeCount; i++)
1017 {
1018 if (strcmp(attributes[i].name, name) == 0)
1019 return &attributes[i];
1020 }
1021
1022 return 0;
1023 }
1024 mike 1.13
1025 // Find first non-whitespace character (set first) and last non-whitespace
1026 // character (set last one past this). For example, consider this string:
1027 //
1028 // " 87 "
1029 //
1030 // The first pointer would point to '8' and the last pointer woudl point one
1031 // beyond '7'.
1032
1033 static void _findEnds(
1034 const char* str,
1035 const char*& first,
1036 const char*& last)
1037 {
1038 first = str;
1039
1040 while (isspace(*first))
1041 first++;
1042
1043 if (!*first)
1044 {
1045 mike 1.13 last = first;
1046 return;
1047 }
1048
1049 last = first + strlen(first);
1050
1051 while (last != first && isspace(last[-1]))
1052 last--;
1053 }
1054
1055 Boolean XmlEntry::getAttributeValue(
1056 const char* name,
1057 Uint32& value) const
1058 {
1059 const XmlAttribute* attr = findAttribute(name);
1060
1061 if (!attr)
1062 return false;
1063
1064 const char* first;
1065 const char* last;
1066 mike 1.13 _findEnds(attr->value, first, last);
1067
1068 char* end = 0;
1069 long tmp = strtol(first, &end, 10);
1070
1071 if (!end || end != last)
1072 return false;
1073
1074 value = Uint32(tmp);
1075 return true;
1076 }
1077
1078 Boolean XmlEntry::getAttributeValue(
1079 const char* name,
1080 Real32& value) const
1081 {
1082 const XmlAttribute* attr = findAttribute(name);
1083
1084 if (!attr)
1085 return false;
1086
1087 mike 1.13 const char* first;
1088 const char* last;
1089 _findEnds(attr->value, first, last);
1090
1091 char* end = 0;
1092 double tmp = strtod(first, &end);
1093
1094 if (!end || end != last)
1095 return false;
1096
1097 value = Uint32(tmp);
1098 return true;
1099 }
1100
1101 Boolean XmlEntry::getAttributeValue(
1102 const char* name,
1103 const char*& value) const
1104 {
1105 const XmlAttribute* attr = findAttribute(name);
1106
1107 if (!attr)
1108 mike 1.13 return false;
1109
1110 value = attr->value;
1111 return true;
1112 }
1113
1114 Boolean XmlEntry::getAttributeValue(const char* name, String& value) const
1115 {
1116 const char* tmp;
1117
1118 if (!getAttributeValue(name, tmp))
1119 return false;
1120
|
1121 david 1.22 value = String(tmp,STRING_FLAG_UTF8);
|
1122 mike 1.13 return true;
1123 }
1124
1125 void XmlAppendCString(Array<Sint8>& out, const char* str)
1126 {
1127 out.append(str, strlen(str));
1128 }
1129
1130 PEGASUS_NAMESPACE_END
|