1 krisbash 1.1 /*
2 **==============================================================================
3 **
4 ** Open Management Infrastructure (OMI)
5 **
6 ** Copyright (c) Microsoft Corporation
7 **
8 ** Licensed under the Apache License, Version 2.0 (the "License"); you may not
9 ** use this file except in compliance with the License. You may obtain a copy
10 ** of the License at
11 **
12 ** http://www.apache.org/licenses/LICENSE-2.0
13 **
14 ** THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 ** KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 ** WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 ** MERCHANTABLITY OR NON-INFRINGEMENT.
18 **
19 ** See the Apache 2 License for the specific language governing permissions
20 ** and limitations under the License.
21 **
22 krisbash 1.1 **==============================================================================
23 */
24
25 #if defined(_MSC_VER)
26 # include <windows.h>
27 #endif
28
29 #include "xml.h"
30 #include <string.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <stdarg.h>
34 #include <ctype.h>
35 #ifdef _MSC_VER
36 #pragma prefast (disable: 28252)
37 #pragma prefast (disable: 28253)
38 #endif
39 #include <wchar.h>
40 #include <stdarg.h>
41
42 #if defined(__GNUC__) && (__GNUC__ >= 4)
43 krisbash 1.1 # define PRINTF_FORMAT(N,M) __attribute__((format(printf, N, M)))
44 #else
45 # define PRINTF_FORMAT(N,M) /* empty */
46 #endif
47
48 static int XML_vsnprintf(char* buf, size_t size, const char* fmt, va_list ap)
49 {
50 #if defined(_MSC_VER)
51 return _vsnprintf_s(buf, size, size, fmt, ap);
52 #else
53 return vsnprintf(buf, size, fmt, ap);
54 #endif
55 }
56
57 static int XML_snprintf(char* buf, size_t size, const char* fmt, ...)
58 {
59 va_list ap;
60 int r;
61 memset(&ap, 0, sizeof(ap));
62 va_start(ap, fmt);
63 #if defined(_MSC_VER)
64 krisbash 1.1 r = _vsnprintf_s(buf, size, size, fmt, ap);
65 #else
66 r = vsnprintf(buf, size, fmt, ap);
67 #endif
68 va_end(ap);
69
70 return r;
71 }
72
73 #include <pal/strings.h>
74
75 #if defined(CONFIG_ENABLE_WCHAR)
76 # define T(STR) L##STR
77 # define XML_strtoul wcstoul
78 # define XML_strlen wcslen
79 # define XML_strcmp wcscmp
80 # define XML_printf wprintf
81 # define XML_fprintf fwprintf
82 #else
83 # define T(STR) STR
84 # define XML_strtoul strtoul
85 krisbash 1.1 # define XML_strlen strlen
86 # define XML_strcmp strcmp
87 # define XML_printf printf
88 # define XML_fprintf fprintf
89 #endif
90
91 // Windows uses these identifiers:
92 #if !defined(_MSC_VER)
93 # define ID_MIUTILS_UNKNOWN 0
94 # define ID_MIUTILS_XMLPARSER_BAD_ENTITY_REFERENCE 0
95 # define ID_MIUTILS_XMLPARSER_BAD_CHARACTER_REFERENCE 1
96 # define ID_MIUTILS_XMLPARSER_UNDEFINED_NAMESPACE_PREFIX 2
97 # define ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_NAME 3
98 # define ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_EQUALS 4
99 # define ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_OPENING_QUOTES 5
100 # define ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_CLOSING_QUOTES 6
101 # define ID_MIUTILS_XMLPARSER_TOO_MANY_NAMESPACES 7
102 # define ID_MIUTILS_XMLPARSER_TOO_MANY_ATTRIBUTES 8
103 # define ID_MIUTILS_XMLPARSER_END_OF_XML_INSTRUCTION 9
104 # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_EXPECTED 10
105 # define ID_MIUTILS_XMLPARSER_END_OF_INSTRUCTION_MISSING 11
106 krisbash 1.1 # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_PREMATURE_END 12
107 # define ID_MIUTILS_XMLPARSER_ELEMENT_DEPTH_OVERFLOW 13
108 # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_NOT_CLOSED 14
109 # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_EXPECTED_ELEM_END 15
110 # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_PREMATURE_END_ELEM_END 16
111 # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_NOT_CLOSED_ELEM_END 17
112 # define ID_MIUTILS_XMLPARSER_ELEMENT_TOO_MANY_ENDS 18
113 # define ID_MIUTILS_XMLPARSER_ELEMENT_END_ELEMENT_TAG_NOT_MATCH_START_TAG 19
114 # define ID_MIUTILS_XMLPARSER_COMMENT_PREMATURE_END 20
115 # define ID_MIUTILS_XMLPARSER_COMMENT_END_EXPECTED 21
116 # define ID_MIUTILS_XMLPARSER_CDATA_PREMATURE_END 22
117 # define ID_MIUTILS_XMLPARSER_DOCTYPE_PREMATURE_END 23
118 # define ID_MIUTILS_XMLPARSER_CHARDATA_EXPECTED_ELEMENT_END_TAG 24
119 # define ID_MIUTILS_XMLPARSER_OPEN_ANGLE_BRACKET_EXPECTED 25
120 # define ID_MIUTILS_XMLPARSER_COMMENT_CDATA_DOCTYPE_EXPECTED 26
121 # define ID_MIUTILS_XMLPARSER_ELEMENT_EXPECTED 27
122 # define ID_MIUTILS_XMLPARSER_UNEXPECTED_STATE 28
123 #endif
124
125 PRINTF_FORMAT(3, 4)
126 void XML_Raise(
127 krisbash 1.1 XML* self,
128 unsigned formatStringId,
129 const Char* format,
130 ...);
131
132 //extern HMODULE g_hModule; /*From DllMain */
133
134 /*
135 **==============================================================================
136 **
137 ** Local definitions
138 **
139 **==============================================================================
140 */
141
142 typedef enum _XML_State
143 {
144 STATE_START,
145 STATE_TAG,
146 STATE_CHARS,
147 }
148 krisbash 1.1 XML_State;
149
150 INLINE int _IsSpace(Char c)
151 {
152 /* Space characters include [\n\t\r ]
153 * _spaceChar['\n'] => 1
154 * _spaceChar['\r'] => 2
155 * _spaceChar['\t'] => 2
156 * _spaceChar[' '] => 2
157 */
158 static const unsigned char _table[256] =
159 {
160 0,0,0,0,0,0,0,0,0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
161 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
162 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
163 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
164 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
165 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
166 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
167 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
168 };
169 krisbash 1.1
170 if (c < 256)
171 return _table[(unsigned int)c];
172 else
173 return 0;
174 }
175
176 /* Matches XML name characters of the form: [A-Za-z_][A-Za-z0-9_-.:]*
177 * _nameChar[A-Za-z_] => 2 (first character)
178 * _nameChar[A-Za-z0-9_-.:] => 1 or 2 (inner character)
179 */
180 static const unsigned char _nameCharTable[256] =
181 {
182 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
183 0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
184 0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,2,
185 0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,
186 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
187 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
188 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
189 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
190 krisbash 1.1 };
191
192 INLINE int _IsFirst(Char c)
193 {
194 if (c < 256)
195 return _nameCharTable[(unsigned int)c] & 2;
196 else
197 return 0;
198 }
199
200 INLINE int _IsInner(Char c)
201 {
202 if (c < 256)
203 return _nameCharTable[(unsigned char)c];
204 else
205 return 0;
206 }
207
208 INLINE Char* _SkipInner(__in_z Char* p)
209 {
210 while (*p && _IsInner(*p))
211 krisbash 1.1 p++;
212
213 return p;
214 }
215
216 static Char* _SkipSpacesAux(__inout XML* self, __in_z Char* p)
217 {
218 Char x;
219 size_t n = 0;
220
221 while (*p && (x = (Char)_IsSpace(*p)) != 0)
222 {
223 n += 0x01 & x;
224 p++;
225 }
226
227 self->line += n;
228 return p;
229 }
230
231 INLINE Char* _SkipSpaces(__inout XML* self, __in_z Char* p)
232 krisbash 1.1 {
233 if ((p[0] == '\0') || !_IsSpace(p[0]))
234 return p;
235
236 if (p[0] == '\n')
237 self->line++;
238
239 if ((p[1] == '\0') || !_IsSpace(p[1]))
240 return &p[1];
241
242 if (p[1] == '\n')
243 self->line++;
244
245 if ((p[2] == '\0') || !_IsSpace(p[2]))
246 return &p[2];
247
248 if (p[2] == '\n')
249 self->line++;
250
251 return _SkipSpacesAux(self, &p[3]);
252 }
253 krisbash 1.1
254 INLINE Char* _ToEntityRef(
255 __inout XML* self,
256 __in_z Char* p,
257 __inout_z Char* ch)
258 {
259 /* Note: we collected the following statistics on the frequency of
260 * each entity reference in a large body of XML documents:
261 *
262 * " - 74,480 occurences
263 * ' - 13,877 occurences
264 * < - 9,919 occurences
265 * > - 9,853 occurences
266 * & - 111 occurences
267 *
268 * The cases below are organized in order of statistical frequency.
269 */
270
271 /* Match one of these: "lt;", "gt;", "amp;", "quot;", "apos;" */
272
273 if (p[0] == 'q' && p[1] == 'u' && p[2] == 'o' && p[3] == 't' && p[4] == ';')
274 krisbash 1.1 {
275 *ch = '"';
276 return p + 5;
277 }
278
279 if (p[0] == 'a' && p[1] == 'p' && p[2] == 'o' && p[3] == 's' && p[4] == ';')
280 {
281 *ch = '\'';
282 return p + 5;
283 }
284
285 if (p[0] == 'l' && p[1] == 't' && p[2] == ';')
286 {
287 *ch = '<';
288 return p + 3;
289 }
290
291 if (p[0] == 'g' && p[1] == 't' && p[2] == ';')
292 {
293 *ch = '>';
294 return p + 3;
295 krisbash 1.1 }
296
297 if (p[0] == 'a' && p[1] == 'm' && p[2] == 'p' && p[3] == ';')
298 {
299 *ch = '&';
300 return p + 4;
301 }
302
303 XML_Raise(
304 self,
305 ID_MIUTILS_XMLPARSER_BAD_ENTITY_REFERENCE,
306 "bad entity reference");
307 return p;
308 }
309
310 INLINE Char* _ToCharRef(__inout XML* self, __in_z Char* p, __inout_z Char* ch)
311 {
312 Char* end = NULL;
313 unsigned long x;
314
315 if (*p == 'x')
316 krisbash 1.1 {
317 p++;
318 x = XML_strtoul(p, &end, 16);
319 }
320 else
321 {
322 x = XML_strtoul(p, &end, 10);
323 }
324
325 if (end == p || *end != ';' || x > 255)
326 {
327 *ch = '\0';
328 XML_Raise(
329 self,
330 ID_MIUTILS_XMLPARSER_BAD_CHARACTER_REFERENCE,
331 "bad character reference");
332 return p;
333 }
334
335 *ch = (Char)x;
336
337 krisbash 1.1 return end + 1;
338 }
339
340 INLINE Char* _ToRef(__inout XML* self, __in_z Char* p, __inout_z Char* ch)
341 {
342 /* Examples:
343 * @
344 * &xFF;
345 * &
346 * <
347 */
348 if (*p == '#')
349 return _ToCharRef(self, p + 1, ch);
350 else
351 return _ToEntityRef(self, p, ch);
352 }
353
354
355 static int _Match1(Char c)
356 {
357 /* Matches all but '\0', '\'', '"', and '&'. All matching charcters
358 krisbash 1.1 * yeild 2, except for '\n', which yields 1
359 */
360 static const unsigned char _match[256] =
361 {
362 0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
363 1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
364 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
365 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
366 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
367 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
368 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
369 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
370 };
371
372 if (c < 256)
373 return _match[(unsigned char)c];
374 else
375 return 1;
376 }
377
378 /* Reduce entity references and remove leading and trailing whitespace */
379 krisbash 1.1 static Char* _ReduceAttrValue(
380 __inout XML* self,
381 __deref_inout_z Char** pInOut,
382 Char eos)
383 {
384 Char* p = *pInOut;
385 Char* end;
386 size_t n = 0;
387
388 /* Skip uninteresting characters */
389 for (;;)
390 {
391 #if defined(_MSC_VER)
392 # pragma prefast(push)
393 # pragma prefast (disable: 26018)
394 #endif
395 while (*p && _Match1(*p))
396 #if defined(_MSC_VER)
397 # pragma prefast(pop)
398 #endif
399 p++;
400 krisbash 1.1
401 if (*p != '\n')
402 break;
403
404 self->line++;
405 p++;
406 }
407
408 end = p;
409
410 while (*p && *p != eos)
411 {
412 if (*p == '&')
413 {
414 Char c = '\0';
415 Char* tmp;
416
417 p++;
418 tmp = _ToRef(self, p, &c);
419
420 if (self->status)
421 krisbash 1.1 {
422 /* Propagate error */
423 return NULL;
424 }
425
426 *end++ = c;
427 p = tmp;
428 }
429 else
430 {
431 if (*p == '\n')
432 n++;
433
434 *end++ = *p++;
435 }
436 }
437
438 *pInOut = p;
439 self->line += n;
440
441 return end;
442 krisbash 1.1 }
443
444 static int _Match2(Char c)
445 {
446 /* Match all but these: '\0', '<', '&', '\n' */
447 static const unsigned char _match[256] =
448 {
449 0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
450 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,
451 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
452 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
453 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
454 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
455 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
456 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
457 };
458
459 if (c < 256)
460 return _match[(unsigned char)c];
461 else
462 return 1;
463 krisbash 1.1 }
464
465 /* Reduce character data, advance p, and return pointer to end */
466 static Char* _ReduceCharData(__inout XML* self, __deref_inout_z Char** pInOut)
467 {
468 Char* p = *pInOut;
469 Char* end;
470 size_t n = 0;
471
472 for (;;)
473 {
474 #if defined(_MSC_VER)
475 # pragma prefast(push)
476 # pragma prefast (disable: 26018)
477 #endif
478 while (*p && (_Match2(*p)))
479 #if defined(_MSC_VER)
480 # pragma prefast(pop)
481 #endif
482 p++;
483
484 krisbash 1.1 if (*p != '\n')
485 break;
486
487 p++;
488 self->line++;
489 }
490
491 end = p;
492
493 /* Can we return now? */
494 if (*p == '<')
495 {
496 *pInOut = p;
497 self->line += n;
498 return end;
499 }
500
501 /* Seek next tag start */
502 while (*p && *p != '<')
503 {
504 if (*p == '&')
505 krisbash 1.1 {
506 Char c = '\0';
507 Char* tmp;
508
509 p++;
510 tmp = _ToRef(self, p, &c);
511
512 if (tmp == p)
513 return NULL;
514
515 *end++ = c;
516 p = tmp;
517 }
518 else
519 {
520 for (;;)
521 {
522 while (_Match2(*p))
523 *end++ = *p++;
524
525 if (*p != '\n')
526 krisbash 1.1 break;
527
528 *end++ = *p++;
529 self->line++;
530 }
531 }
532 }
533
534 /* Document cannot end with character data */
535 if (*p == '\0')
536 return NULL;
537
538 *pInOut = p;
539 self->line += n;
540
541 return end;
542 }
543
544 INLINE unsigned int _HashCode(__in_ecount_z(n) const Char* s, size_t n)
545 {
546 /* This hash algorithm excludes the first character since for many strings
547 krisbash 1.1 * (e.g., URIs) the first character is not unique. Instead the hash
548 * comprises three components:
549 * (1) The length
550 * (3) The last chacter
551 */
552 return n ? (int)(n ^ s[n-1]) : 0;
553 }
554
555 /* Map a URI to a single character namespace identifier */
556 static Char _FindNamespaceID(
557 __inout XML* self,
558 __in_ecount_z(uriSize) const Char* uri,
559 size_t uriSize)
560 {
561 size_t i;
562 unsigned int code = _HashCode(uri, uriSize);
563
564 /* Resolve from client namespace registrations */
565 for (i = 0; i < self->registeredNameSpacesSize; i++)
566 {
567 XML_RegisteredNameSpace* rns = &self->registeredNameSpaces[i];
568 krisbash 1.1
569 if (rns->uriCode == code && XML_strcmp(rns->uri, uri) == 0)
570 return rns->id;
571 }
572
573 /* Not found so return null id */
574 return '\0';
575 }
576
577 #define XML_NS "http://www.w3.org/XML/1998/namespace"
578 #define T_XML_NS T("http://www.w3.org/XML/1998/namespace")
579 #define XML_NS_LEN (sizeof(XML_NS) - 1)
580
581 #define XMLNS_NS "http://www.w3.org/2000/xmlns/"
582 #define T_XMLNS_NS T("http://www.w3.org/2000/xmlns/")
583 #define XMLNS_NS_LEN (sizeof(XMLNS_NS) - 1)
584
585 /* Map a prefix to its XML namespace
586 * A non-empty prefix that is unmapped results in an error */
587 static const XML_NameSpace* _FindNamespace(__inout XML* self, __in_z const Char* prefix)
588 {
589 krisbash 1.1 static const XML_NameSpace s_empty = {T(""), 0, T(""), 0, '\0', 0};
590 static const XML_NameSpace s_xml = {T("xml"), 3 ^ 'l', T_XML_NS, XML_NS_LEN, '\0', 0};
591 static const XML_NameSpace s_xmlns = {T("xmlns"), 5 ^ 's', T_XMLNS_NS, XMLNS_NS_LEN, '\0', 0};
592 unsigned int code = _HashCode(prefix, XML_strlen(prefix));
593 size_t i;
594
595 /* Special case: the 'xml' and 'xmlns' namespaces are fixed. */
596 if (prefix[0] == 'x' &&
597 prefix[1] == 'm' &&
598 prefix[2] == 'l')
599 {
600 if (prefix[3] == '\0')
601 return &s_xml;
602 else if (prefix[3] == 'n' &&
603 prefix[4] == 's' &&
604 prefix[5] == '\0')
605 return &s_xmlns;
606 }
607
608 /* First check single entry cache */
609 if (self->nameSpacesCacheIndex != (size_t)-1)
610 krisbash 1.1 {
611 const XML_NameSpace* ns = &self->nameSpaces[self->nameSpacesCacheIndex];
612
613 if (ns->nameCode == code && XML_strcmp(ns->name, prefix) == 0)
614 {
615 return ns;
616 }
617 }
618
619 /* Translate name to the one found in the nameSpaces[] array */
620 for (i = self->nameSpacesSize; i--; )
621 {
622 const XML_NameSpace* ns = &self->nameSpaces[i];
623
624 if (ns->nameCode == code && XML_strcmp(ns->name, prefix) == 0)
625 {
626 /* Cache */
627 self->nameSpacesCacheIndex = i;
628
629 return ns;
630 }
631 krisbash 1.1 }
632
633 /* No match
634 * For the empty prefix this means there is no namespace
635 * Otherwise, this is an error */
636 if (prefix[0])
637 {
638 XML_Raise(self, ID_MIUTILS_XMLPARSER_UNDEFINED_NAMESPACE_PREFIX,
639 "undefined namespace prefix: %s", prefix);
640 return NULL;
641 }
642
643 return &s_empty;
644 }
645
646 static void _ParseAttr(
647 __inout XML* self,
648 __inout XML_Elem* elem,
649 __deref_inout_z Char** pInOut)
650 {
651 Char* p = *pInOut;
652 krisbash 1.1 Char* name;
653 Char* nameEnd;
654 Char* value;
655 Char* valueEnd;
656 Char* colon = NULL;
657 Char* tag = p;
658 Char* prefix = T("");
659
660 /* Parse the attribute name */
661 {
662 name = p;
663
664 if (!_IsFirst(*p))
665 {
666 XML_Raise(
667 self,
668 ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_NAME,
669 "expected attribute name");
670 return;
671 }
672
673 krisbash 1.1 p++;
674
675 #if defined(_MSC_VER)
676 # pragma prefast(push)
677 # pragma prefast (disable: 26018)
678 #endif
679 p = _SkipInner(p);
680 #if defined(_MSC_VER)
681 # pragma prefast(pop)
682 #endif
683
684 if (*p == ':')
685 {
686 colon = p++;
687 p = _SkipInner(p);
688 }
689
690 nameEnd = p;
691 }
692
693 /* Seek the quote character (position p beyond quote) */
694 krisbash 1.1 {
695 /* Skip spaces */
696 p = _SkipSpaces(self, p);
697
698 /* Expect a '=' character */
699 if (*p++ != '=')
700 {
701 *nameEnd = '\0';
702 XML_Raise(
703 self,
704 ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_EQUALS,
705 "expected '=' character after '%s'",
706 name);
707 return;
708 }
709 }
710
711 /* Null-terminate name now that we are beyond the '=' */
712 *nameEnd = '\0';
713
714 /* Skip spaces */
715 krisbash 1.1 p = _SkipSpaces(self, p);
716
717 /* Parse the value */
718 {
719 Char quote;
720
721 /* Expect opening quote */
722 if (*p != '"' && *p != '\'')
723 {
724 XML_Raise(
725 self,
726 ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_OPENING_QUOTES,
727 "expected opening quote after %s",
728 name);
729 return;
730 }
731
732 quote = *p++;
733 value = p;
734
735 valueEnd = _ReduceAttrValue(self, &p, quote);
736 krisbash 1.1
737 if (self->status)
738 {
739 /* Propagate error */
740 return;
741 }
742
743 /* Expect closing quote */
744 if (*p++ != quote)
745 {
746 XML_Raise(
747 self,
748 ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_CLOSING_QUOTES,
749 "expected closing quote after '%s'",
750 name);
751 return;
752 }
753
754 /* Null-terminate the value */
755 *valueEnd = '\0';
756 }
757 krisbash 1.1
758 /* Skip spaces */
759 p = _SkipSpaces(self, p);
760
761 /* Separate the prefix and tag */
762 if (colon)
763 {
764 *colon = '\0';
765 tag = colon + 1;
766 prefix = name;
767 }
768 /* else prefix is "" and name is the tag */
769
770 /* If the first/only token is "xmlns", extract namespace */
771 if (name[0] == 'x' &&
772 name[1] == 'm' &&
773 name[2] == 'l' &&
774 name[3] == 'n' &&
775 name[4] == 's' &&
776 name[5] == '\0')
777 {
778 krisbash 1.1 unsigned int tagCode = 0;
779
780 /* The namespace of the xmlns:x attribute is fixed */
781 prefix = T("xmlns");
782
783 if (colon)
784 {
785 /* For non-default namespaces */
786 tagCode = _HashCode(tag, nameEnd - tag);
787 }
788
789 /* Add new namespace entry */
790 {
791 /* Check for stack overflow */
792 if (self->nameSpacesSize == XML_MAX_NAMESPACES)
793 {
794 XML_Raise(
795 self,
796 ID_MIUTILS_XMLPARSER_TOO_MANY_NAMESPACES,
797 "too many namespaces: (>%u)",
798 (int)XML_MAX_NAMESPACES);
799 krisbash 1.1 return;
800 }
801 {
802 XML_NameSpace* newNs = &self->nameSpaces[self->nameSpacesSize++];
803 newNs->name = colon ? tag : T("");
804 newNs->nameCode = tagCode;
805 newNs->id = _FindNamespaceID(self, value, valueEnd - value);
806 newNs->uri = value;
807 newNs->uriSize = valueEnd - value;
808 newNs->depth = self->stackSize;
809 }
810 }
811 }
812
813 /* Append attribute to element */
814 {
815 XML_Attr* attr;
816
817 /* Check for attribute array overflow */
818 if (elem->attrsSize == XML_MAX_ATTRIBUTES)
819 {
820 krisbash 1.1 elem->data[elem->size] = 0;
821
822 XML_Raise(
823 self,
824 ID_MIUTILS_XMLPARSER_TOO_MANY_ATTRIBUTES,
825 "too many attributes '%s': (>%u)",
826 elem->data,
827 (int)XML_MAX_ATTRIBUTES);
828 return;
829 }
830
831 attr = &elem->attrs[elem->attrsSize++];
832 attr->name = tag;
833 attr->nameSize = nameEnd - tag;
834 attr->value = value;
835 attr->valueSize = valueEnd - value;
836
837 /* Save the namespace prefix, which will be translated by the caller */
838 attr->namespaceUri = prefix;
839 }
840
841 krisbash 1.1 *pInOut = p;
842 }
843
844 static void _ParseProcessingInstruction(
845 __inout XML* self,
846 __inout XML_Elem* elem,
847 __in_z Char* p)
848 {
849 /* <?xml version="1.0" encoding="UTF-8" standalone="yes"?> */
850 Char* start;
851 Char* end;
852
853 /* Advance past '?' character */
854 if (*p)
855 {
856 p++;
857 }
858
859 /* Get tag identifier */
860 {
861 start = p;
862 krisbash 1.1
863 if (*p)
864 {
865 p = _SkipInner(p);
866 }
867
868 if (*p == ':')
869 {
870 p++;
871 if (*p)
872 {
873 p = _SkipInner(p);
874 }
875 }
876
877 /* If input exhuasted */
878 if (*p == '\0')
879 {
880 XML_Raise(
881 self,
882 ID_MIUTILS_XMLPARSER_END_OF_XML_INSTRUCTION,
883 krisbash 1.1 "premature end of input");
884 return;
885 }
886
887 end = p;
888 }
889
890 /* Skip spaces */
891 if (*p)
892 {
893 p = _SkipSpaces(self, p);
894 }
895
896 elem->type = XML_INSTRUCTION;
897 elem->data = start;
898 elem->size = end - start;
899
900 /* Process attributes */
901
902 while (*p && *p != '?')
903 {
904 krisbash 1.1 _ParseAttr(self, elem, &p);
905
906 if (self->status)
907 {
908 /* Propagate error */
909 return;
910 }
911 }
912
913 if (*p)
914 {
915 p++;
916 }
917
918 /* Skip spaces */
919 if (*p)
920 {
921 p = _SkipSpaces(self, p);
922 }
923
924 /* Expect '>' */
925 krisbash 1.1 if (*p++ != '>')
926 {
927 XML_Raise(
928 self,
929 ID_MIUTILS_XMLPARSER_END_OF_INSTRUCTION_MISSING,
930 "expected closing angle bracket");
931 return;
932 }
933
934 /* Return element object */
935 elem->namespaceUri = T("");
936 elem->namespaceUriSize = 0;
937 elem->namespaceId = '\0';
938 *end = '\0';
939
940 self->ptr = p;
941
942 if (self->foundRoot)
943 self->state = STATE_CHARS;
944 else
945 self->state = STATE_START;
946 krisbash 1.1 }
947
948 static void _ParseStartTag(
949 __inout XML* self,
950 __inout XML_Elem* elem,
951 __in_z Char* p)
952 {
953 Char* name;
954 Char* nameEnd;
955 Char* colon = NULL;
956 Char* prefix = T("");
957 const XML_NameSpace* ns;
958 size_t attr;
959
960 /* Found the root */
961 self->foundRoot = 1;
962
963 /* Get tag identifier */
964 {
965 name = p;
966
967 krisbash 1.1 if (!*p || !_IsFirst(*p++))
968 {
969 XML_Raise(
970 self,
971 ID_MIUTILS_XMLPARSER_ELEMENT_NAME_EXPECTED,
972 "expected element name");
973 return;
974 }
975
976 if (*p)
977 {
978 p = _SkipInner(p);
979 }
980
981 if (*p == ':')
982 {
983 colon = p++;
984 if (*p)
985 {
986 p = _SkipInner(p);
987 }
988 krisbash 1.1 }
989
990 /* If input exhuasted */
991 if (*p == '\0')
992 {
993 XML_Raise(
994 self,
995 ID_MIUTILS_XMLPARSER_ELEMENT_NAME_PREMATURE_END,
996 "premature end of input");
997 return;
998 }
999
1000 nameEnd = p;
1001 }
1002
1003 if (colon)
1004 {
1005 *colon = '\0';
1006 prefix = name;
1007 name = colon + 1;
1008 }
1009 krisbash 1.1
1010 /* Skip spaces */
1011 p = _SkipSpaces(self, p);
1012
1013 elem->type = XML_START;
1014 elem->data = name;
1015 elem->size = nameEnd - name;
1016
1017 /* Process attributes */
1018 while (*p && *p != '/' && *p != '>')
1019 {
1020 _ParseAttr(self, elem, &p);
1021
1022 if (self->status)
1023 return;
1024 }
1025
1026 /* Translate the namespace after parsing xmlns attributes */
1027 ns = _FindNamespace(self, prefix);
1028
1029 if (self->status)
1030 krisbash 1.1 return;
1031
1032 /* Now translate the attribute namespaces */
1033 /* Unprefixed attributes inherit from the element */
1034 for (attr = 0; attr < elem->attrsSize; attr++)
1035 {
1036 XML_Attr* item = elem->attrs + attr;
1037 const XML_NameSpace* itemNS = ns;
1038
1039 if (item->namespaceUri[0] != '\0')
1040 {
1041 /* The namespaceUri field contains the prefix */
1042 itemNS = _FindNamespace(self, item->namespaceUri);
1043
1044 if (self->status)
1045 return;
1046 }
1047
1048 item->namespaceUri = itemNS->uri;
1049 item->namespaceUriSize = itemNS->uriSize;
1050 item->namespaceId = itemNS->id;
1051 krisbash 1.1 }
1052
1053 /* Create the element */
1054 elem->type = XML_START;
1055 elem->data = name;
1056 elem->size = nameEnd - name;
1057 elem->namespaceUri = ns->uri;
1058 elem->namespaceUriSize = ns->uriSize;
1059 elem->namespaceId = ns->id;
1060
1061 /* Check for empty tag */
1062 if (*p == '/')
1063 {
1064 p++;
1065
1066 /* Null-terminate the tag */
1067 *nameEnd = '\0';
1068
1069 /* Inject an empty tag onto element stack */
1070 {
1071 /* Check for stack overflow */
1072 krisbash 1.1 if (self->elemStackSize == XML_MAX_NESTED)
1073 {
1074 XML_Raise(
1075 self,
1076 ID_MIUTILS_XMLPARSER_ELEMENT_DEPTH_OVERFLOW,
1077 "element stack overflow (>%u)",
1078 (int)XML_MAX_NESTED);
1079 return;
1080 }
1081
1082 self->elemStack[self->elemStackSize] = *elem;
1083 self->elemStack[self->elemStackSize].type = XML_END;
1084 self->elemStackSize++;
1085 self->nesting++;
1086 }
1087
1088 /* Skip space */
1089 if (*p)
1090 {
1091 p = _SkipSpaces(self, p);
1092 }
1093 krisbash 1.1
1094 /* Expect '>' */
1095 if (*p++ != '>')
1096 {
1097 *nameEnd = '\0';
1098 XML_Raise(
1099 self,
1100 ID_MIUTILS_XMLPARSER_ELEMENT_NAME_NOT_CLOSED,
1101 "expected closing angle bracket: %s",
1102 elem->data);
1103 return;
1104 }
1105
1106 self->ptr = p;
1107 self->state = STATE_CHARS;
1108 return;
1109 }
1110
1111 /* Expect '>' */
1112 if (*p++ != '>')
1113 {
1114 krisbash 1.1 *nameEnd = '\0';
1115 XML_Raise(
1116 self,
1117 ID_MIUTILS_XMLPARSER_ELEMENT_NAME_NOT_CLOSED,
1118 "expected closing angle bracket: '%s'",
1119 elem->data);
1120 return;
1121 }
1122
1123 /* Zero-terminate the name tag */
1124 *nameEnd = '\0';
1125
1126 /* Push opening tag */
1127 {
1128 if (self->stackSize == XML_MAX_NESTED)
1129 {
1130 XML_Raise(
1131 self,
1132 ID_MIUTILS_XMLPARSER_ELEMENT_DEPTH_OVERFLOW,
1133 "element stack overflow (>%u)",
1134 (int)XML_MAX_NESTED);
1135 krisbash 1.1 return;
1136 }
1137
1138 {
1139 XML_Name tmp;
1140 tmp.data = elem->data;
1141 tmp.size = elem->size;
1142 tmp.namespaceUri = elem->namespaceUri;
1143 tmp.namespaceUriSize = elem->namespaceUriSize;
1144 tmp.namespaceId = elem->namespaceId;
1145
1146 self->stack[self->stackSize] = tmp;
1147 self->stackSize++;
1148 self->nesting++;
1149 }
1150 }
1151
1152 self->ptr = p;
1153
1154 if (self->foundRoot)
1155 self->state = STATE_CHARS;
1156 krisbash 1.1 else
1157 self->state = STATE_START;
1158 }
1159
1160 static void _ParseEndTag(
1161 __inout XML* self,
1162 __inout XML_Elem* elem,
1163 __in_z Char* p)
1164 {
1165 /* Closing element: </name> */
1166 Char* name;
1167 Char* nameEnd;
1168 Char* colon = NULL;
1169 Char* prefix = T("");
1170 const XML_NameSpace *ns;
1171
1172 if (*p)
1173 {
1174 p++;
1175 }
1176
1177 krisbash 1.1 /* Skip space */
1178 if (*p)
1179 {
1180 p = _SkipSpaces(self, p);
1181 }
1182
1183 name = p;
1184
1185 /* Skip name */
1186 {
1187 if (!*p || !_IsFirst(*p++))
1188 {
1189 XML_Raise(
1190 self,
1191 ID_MIUTILS_XMLPARSER_ELEMENT_NAME_EXPECTED_ELEM_END,
1192 "expected end tag");
1193 return;
1194 }
1195
1196 if (*p)
1197 {
1198 krisbash 1.1 p = _SkipInner(p);
1199 }
1200
1201 if (*p == ':')
1202 {
1203 colon = p++;
1204 if (*p)
1205 {
1206 p = _SkipInner(p);
1207 }
1208 }
1209 }
1210
1211 /* If input exhuasted */
1212 if (*p == '\0')
1213 {
1214 XML_Raise(
1215 self,
1216 ID_MIUTILS_XMLPARSER_ELEMENT_NAME_PREMATURE_END_ELEM_END,
1217 "premature end of input");
1218 return;
1219 krisbash 1.1 }
1220
1221 nameEnd = p;
1222
1223 if (colon)
1224 {
1225 *colon = '\0';
1226 prefix = name;
1227 name = colon + 1;
1228 }
1229
1230 /* Skip spaces */
1231 p = _SkipSpaces(self, p);
1232
1233 /* Expect '>' */
1234 if (*p++ != '>')
1235 {
1236 XML_Raise(
1237 self,
1238 ID_MIUTILS_XMLPARSER_ELEMENT_NAME_NOT_CLOSED_ELEM_END,
1239 "expected closing angle bracket");
1240 krisbash 1.1 return;
1241 }
1242
1243 /* Null terminate name */
1244 *nameEnd = '\0';
1245
1246 ns = _FindNamespace(self, prefix);
1247
1248 if (self->status)
1249 return;
1250
1251 /* Return element object */
1252 elem->type = XML_END;
1253 elem->data = name;
1254 elem->size = nameEnd - name;
1255 elem->namespaceUri = ns->uri;
1256 elem->namespaceUriSize = ns->uriSize;
1257 elem->namespaceId = ns->id;
1258
1259 /* Match opening name */
1260 {
1261 krisbash 1.1 /* Check for stack underflow */
1262 if (self->stackSize-- == 0)
1263 {
1264 XML_Raise(
1265 self,
1266 ID_MIUTILS_XMLPARSER_ELEMENT_TOO_MANY_ENDS,
1267 "too many closing tags: %s",
1268 name);
1269 return;
1270 }
1271
1272 self->nesting--;
1273
1274 /* Check that closing name matches opening name */
1275 {
1276 XML_Name* xn = &self->stack[self->stackSize];
1277
1278 if (XML_strcmp(xn->data, name) != 0 ||
1279 xn->namespaceId != ns->id ||
1280 (ns->id == 0 && XML_strcmp(xn->namespaceUri, ns->uri) != 0))
1281 {
1282 krisbash 1.1 XML_Raise(
1283 self,
1284 ID_MIUTILS_XMLPARSER_ELEMENT_END_ELEMENT_TAG_NOT_MATCH_START_TAG,
1285 "open/close tag mismatch: %s/%s",
1286 self->stack[self->stackSize].data,
1287 name);
1288 return;
1289 }
1290 }
1291 }
1292
1293 /* Remove namespaces that have just gone out of scope */
1294 {
1295 size_t i;
1296 size_t n = 0;
1297
1298 for (i = self->nameSpacesSize; i--; )
1299 {
1300 if (self->nameSpaces[i].depth >= self->stackSize)
1301 n++;
1302 }
1303 krisbash 1.1
1304 if (n)
1305 {
1306 self->nameSpacesSize -= n;
1307
1308 /* Clear single-entry cache */
1309 if (self->nameSpacesCacheIndex >= self->nameSpacesSize)
1310 self->nameSpacesCacheIndex = (size_t)-1;
1311 }
1312 }
1313
1314 /* Set next state */
1315 self->ptr = p;
1316 self->state = STATE_CHARS;
1317 }
1318
1319 static void _ParseComment(
1320 __inout XML* self,
1321 __inout XML_Elem* elem,
1322 __in_z Char* p)
1323 {
1324 krisbash 1.1 /* Comment: <!-- blah blah blah --> */
1325 Char* start;
1326 Char* end;
1327
1328 if (!p[0] || !p[1])
1329 {
1330 XML_Raise(
1331 self,
1332 ID_MIUTILS_XMLPARSER_COMMENT_PREMATURE_END,
1333 "premature end of comment");
1334 return;
1335 }
1336 p += 2;
1337 start = p;
1338
1339 while (*p)
1340 {
1341 if (p[0] == '-' && p[1] == '-')
1342 {
1343 if (p[2] != '>')
1344 {
1345 krisbash 1.1 XML_Raise(
1346 self,
1347 ID_MIUTILS_XMLPARSER_COMMENT_END_EXPECTED,
1348 "expected closing comment");
1349 return;
1350 }
1351
1352 /* Null-terminate this comment */
1353 end = p;
1354 p += 3;
1355
1356 /* Prepare element */
1357 elem->type = XML_COMMENT;
1358 elem->data = start;
1359 elem->size = end - start;
1360 elem->namespaceUri = T("");
1361 elem->namespaceUriSize = 0;
1362 elem->namespaceId = '\0';
1363 *end = '\0';
1364
1365 /* Set next state */
1366 krisbash 1.1 self->ptr = p;
1367
1368 if (self->foundRoot)
1369 self->state = STATE_CHARS;
1370 else
1371 self->state = STATE_START;
1372
1373 return;
1374 }
1375 else if (p[0] == '\n')
1376 self->line++;
1377
1378 p++;
1379 }
1380
1381 XML_Raise(
1382 self,
1383 ID_MIUTILS_XMLPARSER_COMMENT_PREMATURE_END,
1384 "malformed comment");
1385 }
1386
1387 krisbash 1.1 static void _ParseCDATA(
1388 __inout XML* self,
1389 __inout XML_Elem* elem,
1390 __in_z Char* p)
1391 {
1392 /* <![CDATA[...]]> */
1393 Char* start;
1394 Char* end;
1395
1396 /* Recognize <!DOCTYPE ...> */
1397 if (!p[0] || !p[1] || !p[2] || !p[3] || !p[4] || !p[5] || !p[6])
1398 {
1399 XML_Raise(
1400 self,
1401 ID_MIUTILS_XMLPARSER_CDATA_PREMATURE_END,
1402 "premature end of CDATA");
1403 return;
1404 }
1405 p += 7;
1406 start = p;
1407
1408 krisbash 1.1 while (*p)
1409 {
1410 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
1411 {
1412 end = p;
1413 p += 3;
1414
1415 /* Prepare element */
1416 elem->type = XML_CHARS;
1417 elem->data = start;
1418 elem->size = end - start;
1419 elem->namespaceUri = T("");
1420 elem->namespaceUriSize = 0;
1421 elem->namespaceId = '\0';
1422 *end = '\0';
1423
1424 /* Set next state */
1425 self->ptr = p;
1426 self->state = STATE_CHARS;
1427
1428 return;
1429 krisbash 1.1
1430 }
1431 else if (p[0] == '\n')
1432 self->line++;
1433
1434 p++;
1435 }
1436
1437 XML_Raise(
1438 self,
1439 ID_MIUTILS_XMLPARSER_CDATA_PREMATURE_END,
1440 "unterminated CDATA section");
1441 return;
1442 }
1443
1444 static void _ParseDOCTYPE(
1445 __inout XML* self,
1446 __inout XML_Elem* elem,
1447 __in_z Char* p)
1448 {
1449 MI_UNUSED(elem);
1450 krisbash 1.1
1451 /* Recognize <!DOCTYPE ...> */
1452 if (!p[0] || !p[1] || !p[2] || !p[3] || !p[4] || !p[5] || !p[6])
1453 {
1454 XML_Raise(self,
1455 ID_MIUTILS_XMLPARSER_DOCTYPE_PREMATURE_END,
1456 "premature end of DOCTYPE");
1457 return;
1458 }
1459 p += 7;
1460
1461 while (*p != '\0' && *p != '>')
1462 {
1463 if (p[0] == '\n')
1464 self->line++;
1465
1466 p++;
1467 }
1468
1469 if (*p++ != '>')
1470 {
1471 krisbash 1.1 XML_Raise(
1472 self,
1473 ID_MIUTILS_XMLPARSER_DOCTYPE_PREMATURE_END,
1474 "premature end of DOCTYPE");
1475 return;
1476 }
1477
1478 /* Set next state */
1479 self->ptr = p;
1480
1481 if (self->foundRoot)
1482 self->state = STATE_CHARS;
1483 else
1484 self->state = STATE_START;
1485 }
1486
1487 static int _ParseCharData(
1488 __inout XML* self,
1489 __inout XML_Elem* elem,
1490 __in_z Char* p)
1491 {
1492 krisbash 1.1 Char* start;
1493 Char* end;
1494
1495 #if 0
1496 pca: Need function to strip leading and trailing white space on a needed basis
1497 /* Skip leading spaces */
1498 p = _SkipSpaces(self, p);
1499 #endif
1500 /* Reject input if it does appear inside tags */
1501 if (self->stackSize == 0)
1502 {
1503 /* Finished parsing document */
1504 self->status = 1;
1505 self->ptr = p;
1506 return 0;
1507 }
1508
1509 #if 0
1510 pca: Need function to strip leading and trailing white space on a needed basis
1511 /* Remove leading spaces */
1512 p = _SkipSpaces(self, p);
1513 krisbash 1.1 #endif
1514
1515 if (*p == '<')
1516 {
1517 self->ptr = p + 1;
1518 self->state = STATE_TAG;
1519 return 0;
1520 }
1521
1522 /* Save pointer to start of data */
1523 start = p;
1524
1525 /* reduce character data */
1526 end = _ReduceCharData(self, &p);
1527
1528 if (self->status)
1529 {
1530 /* Propagate error */
1531 return 0;
1532 }
1533
1534 krisbash 1.1 /* Process character data */
1535 if (*p != '<')
1536 {
1537 XML_Raise(
1538 self,
1539 ID_MIUTILS_XMLPARSER_CHARDATA_EXPECTED_ELEMENT_END_TAG,
1540 "expcted opening angle bracket");
1541 return 0;
1542 }
1543
1544 #if 0
1545 pca: Need function to strip leading and trailing white space on a needed basis
1546 /* Remove trailing spaces (the newlines have already been counted) */
1547 {
1548 /* Remove trainling spaces from the character data */
1549 start[-1] = '\0';
1550
1551 while (_IsSpace(end[-1]))
1552 end--;
1553 }
1554 #endif
1555 krisbash 1.1
1556 /* Set next state */
1557 self->ptr = p + 1;
1558 self->state = STATE_TAG;
1559
1560 /* Return character data element if non-empty */
1561 if (end == start)
1562 return 0;
1563
1564 /* Prepare element */
1565 *end = '\0';
1566 elem->type = XML_CHARS;
1567 elem->data = start;
1568 elem->size = end - start;
1569 elem->namespaceUri = T("");
1570 elem->namespaceUriSize = 0;
1571 elem->namespaceId = '\0';
1572
1573 #if 1
1574 XML_StripWhitespace(elem);
1575 #endif
1576 krisbash 1.1
1577 /* Return 1 to indicate non-empty element */
1578 return 1;
1579 }
1580
1581 /*
1582 **==============================================================================
1583 **
1584 ** Public definitions
1585 **
1586 **==============================================================================
1587 */
1588
1589 const Char* XML_Elem_GetAttr(
1590 __inout XML_Elem* self,
1591 __in_z const Char* name)
1592 {
1593 size_t i;
1594
1595 for (i = 0; i < self->attrsSize; i++)
1596 {
1597 krisbash 1.1 if (XML_strcmp(name, self->attrs[i].name) == 0)
1598 return self->attrs[i].value;
1599 }
1600
1601 /* Not found! */
1602 return NULL;
1603 }
1604
1605 void XML_Init(
1606 __out XML* self)
1607 {
1608 memset(self, 0, sizeof(XML));
1609
1610 self->nameSpacesCacheIndex = (size_t)-1;
1611 }
1612
1613 void XML_SetText(
1614 __inout XML* self,
1615 __in_z Char* text)
1616 {
1617 self->text = text;
1618 krisbash 1.1 self->ptr = text;
1619 self->line = 1;
1620 self->state = STATE_START;
1621 }
1622
1623 int XML_Next(
1624 __inout XML* self,
1625 __out XML_Elem* elem)
1626 {
1627 if (self->elemStackSize)
1628 {
1629 *elem = self->elemStack[--self->elemStackSize];
1630 self->nesting--;
1631 return 0;
1632 }
1633
1634 elem->attrsSize = 0;
1635
1636 for (;;)
1637 {
1638 switch (self->state)
1639 krisbash 1.1 {
1640 case STATE_START:
1641 {
1642 Char* p = self->ptr;
1643
1644 /* Skip spaces */
1645 p = _SkipSpaces(self, p);
1646
1647 /* Expect '<' */
1648 if (*p != '<')
1649 {
1650 XML_Raise(
1651 self,
1652 ID_MIUTILS_XMLPARSER_OPEN_ANGLE_BRACKET_EXPECTED,
1653 "expected open angle bracket");
1654 return -1;
1655 }
1656
1657 self->ptr = p + 1;
1658 self->state = STATE_TAG;
1659 break;
1660 krisbash 1.1 }
1661 case STATE_TAG:
1662 {
1663 Char* p = self->ptr;
1664
1665 /* Skip spaces */
1666 p = _SkipSpaces(self, p);
1667
1668 /* Expect one of these */
1669 if (*p == '/')
1670 {
1671 _ParseEndTag(self, elem, p);
1672 return self->status;
1673 }
1674 else if (_IsFirst(*p))
1675 {
1676 _ParseStartTag(self, elem, p);
1677 return self->status;
1678 }
1679 else if (*p == '?')
1680 {
1681 krisbash 1.1 _ParseProcessingInstruction(self, elem, p);
1682 return self->status;
1683 }
1684 else if (*p == '!')
1685 {
1686 p++;
1687
1688 if (p[0] == '-' && p[1] == '-')
1689 {
1690 _ParseComment(self, elem, p);
1691 return self->status;
1692 }
1693 else if (memcmp(p, T("[CDATA["), 7 * sizeof(Char)) == 0)
1694 {
1695 _ParseCDATA(self, elem, p);
1696 return self->status;
1697 }
1698 else if (memcmp(p, T("DOCTYPE"), 7 * sizeof(Char)) == 0)
1699 {
1700 _ParseDOCTYPE(self, elem, p);
1701
1702 krisbash 1.1 if (self->status)
1703 return -1;
1704
1705 break;
1706 }
1707 else
1708 {
1709 XML_Raise(
1710 self,
1711 ID_MIUTILS_XMLPARSER_COMMENT_CDATA_DOCTYPE_EXPECTED,
1712 "expected comment, CDATA, or DOCTYPE");
1713 return -1;
1714 }
1715 }
1716 else
1717 {
1718 XML_Raise(
1719 self,
1720 ID_MIUTILS_XMLPARSER_ELEMENT_EXPECTED,
1721 "expected element");
1722 return-1;
1723 krisbash 1.1 }
1724 break;
1725 }
1726 case STATE_CHARS:
1727 {
1728 Char* p = self->ptr;
1729
1730 if (_ParseCharData(self, elem, p) == 1)
1731 {
1732 /* Return character data to caller */
1733 return 0;
1734 }
1735
1736 if (self->status)
1737 return self->status;
1738
1739 /* empty character data */
1740 break;
1741 }
1742 default:
1743 {
1744 krisbash 1.1 XML_Raise(
1745 self,
1746 ID_MIUTILS_XMLPARSER_UNEXPECTED_STATE,
1747 "unexpected state");
1748 return -1;
1749 }
1750 }
1751 }
1752
1753 //return 0;
1754 }
1755
1756 int XML_Expect(
1757 __inout XML* self,
1758 __out XML_Elem* elem,
1759 XML_Type type,
1760 Char knownNamespaceID, /* 0 means not well known */
1761 __in_z_opt const Char * namespaceURI, /* NULL if expectedNamespace is not 0, otherwise real URL */
1762 __in_z const Char* name)
1763 {
1764 if (XML_Next(self, elem) == 0 &&
1765 krisbash 1.1 elem->type == type &&
1766 (!name || XML_strcmp(elem->data, name) == 0))
1767 {
1768 if (!name)
1769 {
1770 return 0; /* No name means no namespace! */
1771 }
1772 if (knownNamespaceID)
1773 {
1774 if ( knownNamespaceID == elem->namespaceId)
1775 {
1776 return 0; /* Known namespace IDs the same */
1777 }
1778 XML_Raise(
1779 self,
1780 ID_MIUTILS_UNKNOWN,
1781 T("Namespace URI IDs do not match: <%c>: %c"),
1782 knownNamespaceID,
1783 elem->namespaceId);
1784 return -1;
1785 }
1786 krisbash 1.1 else
1787 {
1788 if (namespaceURI && elem->namespaceUri &&
1789 (XML_strcmp(namespaceURI, elem->namespaceUri)== 0))
1790 {
1791 /* Non-standard namespace URIs match */
1792 return 0;
1793 }
1794 XML_Raise(self,
1795 ID_MIUTILS_UNKNOWN,
1796 T("Namespace URIs do not match: <%s>: %s"),
1797 namespaceURI,
1798 elem->namespaceUri);
1799 return -1;
1800 }
1801 }
1802
1803 if (type == XML_START)
1804 XML_Raise(
1805 self,
1806 ID_MIUTILS_UNKNOWN,
1807 krisbash 1.1 T("expected element: <%s>: %s"),
1808 name,
1809 elem->data);
1810 else if (type == XML_END)
1811 {
1812 XML_Raise(
1813 self,
1814 ID_MIUTILS_UNKNOWN,
1815 T("expected element: </%s>: %s"),
1816 name,
1817 elem->data);
1818 }
1819 else if (type == XML_CHARS)
1820 {
1821 XML_Raise(
1822 self,
1823 ID_MIUTILS_UNKNOWN,
1824 T("expected character data"));
1825 }
1826
1827 return -1;
1828 krisbash 1.1 }
1829
1830 int XML_Skip(
1831 __inout XML* self)
1832 {
1833 XML_Elem tmp;
1834 size_t nesting = self->nesting;
1835
1836 while (self->nesting >= nesting)
1837 {
1838 if (XML_Next(self, &tmp) != 0)
1839 return -1;
1840 }
1841
1842 return 0;
1843 }
1844
1845 int XML_RegisterNameSpace(
1846 __inout XML* self,
1847 Char id,
1848 __in_z const Char* uri)
1849 krisbash 1.1 {
1850 XML_RegisteredNameSpace rns;
1851 /* ATTN: we do not check for duplicates */
1852
1853 /* Reject out of range ids */
1854 if (id < 'a' || id > 'z')
1855 return -1;
1856
1857 /* Check for overflow of the array */
1858 if (self->registeredNameSpacesSize == XML_MAX_REGISTERED_NAMESPACES)
1859 return -1;
1860
1861 rns.id = id;
1862 rns.uri = uri;
1863 rns.uriCode = _HashCode(uri, XML_strlen(uri));
1864
1865 self->registeredNameSpaces[self->registeredNameSpacesSize++] = rns;
1866
1867 return 0;
1868 }
1869
1870 krisbash 1.1 int XML_PutBack(
1871 __inout XML* self,
1872 __in const XML_Elem* elem)
1873 {
1874 /* Check for stack overflow */
1875 if (self->elemStackSize == XML_MAX_NESTED)
1876 {
1877 XML_Raise(
1878 self,
1879 ID_MIUTILS_UNKNOWN,
1880 T("element stack overflow (>%u)"),
1881 XML_MAX_NESTED);
1882 return -1;
1883 }
1884
1885 self->elemStack[self->elemStackSize++] = *elem;
1886 return 0;
1887 }
1888
1889 void XML_PutError(__inout XML* self)
1890 {
1891 krisbash 1.1 if (self->status == -1)
1892 {
1893 XML_fprintf(stderr, T("line %u: %s\n"), (int)self->line, self->message);
1894 }
1895 }
1896
1897 void XML_Raise(
1898 XML* self,
1899 unsigned formatStringId,
1900 const Char* format,
1901 ...)
1902 {
1903 #if defined(CONFIG_ENABLE_WCHAR)
1904 # error "implement this!"
1905 #else
1906 int n;
1907 va_list ap;
1908 memset(&ap, 0, sizeof(ap));
1909
1910 self->status = -1;
1911 self->message[0] = '\0';
1912 krisbash 1.1
1913 va_start(ap, format);
1914 n = XML_vsnprintf(self->message, sizeof(self->message), format, ap);
1915 va_end(ap);
1916 #endif
1917 }
1918
1919 void XML_FormatError(
1920 __inout XML* self,
1921 __out_ecount_z(size) Char* format,
1922 size_t size)
1923 {
1924 *format = '\0';
1925
1926 if (self->status == -1)
1927 {
1928 XML_snprintf(
1929 format,
1930 size,
1931 T("%u: error: %s"),
1932 (unsigned int)self->line,
1933 krisbash 1.1 self->message);
1934 }
1935 }
1936
1937 int XML_StripWhitespace(
1938 __inout XML_Elem* elem)
1939 {
1940 if (elem->type != XML_CHARS)
1941 {
1942 return 0;
1943 }
1944
1945 //Strip leading white space
1946 while (elem->size && _IsSpace(*elem->data))
1947 {
1948 elem->data++;
1949 elem->size--;
1950 }
1951 //Strip trailing white space
1952 while(elem->size && _IsSpace(elem->data[elem->size-1]))
1953 {
1954 krisbash 1.1 elem->data[elem->size-1] = T('\0');
1955 elem->size--;
1956 }
1957 return 0;
1958 }
|