1 mike 1.1 /*
2 **==============================================================================
3 **
4 ** Open Management Infrastructure (OMI)
5 **
6 ** Copyright (c) Microsoft Corporation
7 **
8 ** Licensed under the Apache License, Version 2.0 (the "License"); you may not
9 ** use this file except in compliance with the License. You may obtain a copy
10 ** of the License at
11 **
12 ** http://www.apache.org/licenses/LICENSE-2.0
13 **
14 ** THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 ** KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 ** WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 ** MERCHANTABLITY OR NON-INFRINGEMENT.
18 **
19 ** See the Apache 2 License for the specific language governing permissions
20 ** and limitations under the License.
21 **
22 mike 1.1 **==============================================================================
23 */
24
25 #include <common.h>
26 #include "xml.h"
27 #include <string.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdarg.h>
31 #include <ctype.h>
|
32 krisbash 1.4 #include <pal/format.h>
|
33 mike 1.1
34 #if defined(_MSC_VER)
35 /* PreFast - reviewed and believed to be false-positive*/
36
37 /* warning C6385: Invalid data: accessing '??', the readable size is 'x' bytes, but 'y' bytes might be read: Lines: ... */
38 # pragma warning(disable : 6385)
39 /* warning C6386: Buffer overrun: accessing 'self->registeredNameSpaces' ... */
40 # pragma warning(disable : 6386)
41
42 #endif /* _MSC_VER */
43
|
44 krisbash 1.4 #if defined(CONFIG_ENABLE_WCHAR)
45 # define T(STR) L##STR
46 # define XML_strtoul wcstoul
47 # define XML_strcmp wcscmp
48 # define XML_strlen wcslen
49 #else
50 # define T(STR) STR
51 # define T(STR) STR
52 # define XML_strtoul strtoul
53 # define XML_strcmp strcmp
54 # define XML_strlen strlen
55 #endif
56
|
57 mike 1.1 /*
58 **==============================================================================
59 **
60 ** Local definitions
61 **
62 **==============================================================================
63 */
64
65 typedef enum _XML_State
66 {
67 STATE_START,
68 STATE_TAG,
69 STATE_CHARS,
70 }
71 XML_State;
72
73 /* Space characters include [\n\t\r ]
74 * _spaceChar['\n'] => 1
75 * _spaceChar['\r'] => 2
76 * _spaceChar['\t'] => 2
77 * _spaceChar[' '] => 2
78 mike 1.1 */
|
79 krisbash 1.4 static const unsigned char _spaceChar[256] =
|
80 mike 1.1 {
81 0,0,0,0,0,0,0,0,0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
82 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
83 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
84 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
85 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
86 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
87 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
88 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
89 };
90
|
91 krisbash 1.4 INLINE int _IsSpace(XML_Char c)
|
92 mike 1.1 {
|
93 krisbash 1.4 #if defined(CONFIG_ENABLE_WCHAR)
94 if (c >= 0 && c < 256)
95 return _spaceChar[(unsigned char)c];
96 else
97 return 0;
98 #else
99 return _spaceChar[(unsigned char)c];
100 #endif
|
101 mike 1.1 }
102
103 /* Matches XML name characters of the form: [A-Za-z_][A-Za-z0-9_-.:]*
104 * _nameChar[A-Za-z_] => 2 (first character)
105 * _nameChar[A-Za-z0-9_-.:] => 1 or 2 (inner character)
106 */
|
107 krisbash 1.4 static const unsigned char _nameChar[256] =
|
108 mike 1.1 {
109 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
110 0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
111 0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,2,
112 0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,
113 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
114 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
115 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
116 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
117 };
118
|
119 krisbash 1.4 /*
120 Skips N characters while checking that end of the string has not been hit
121 */
122 INLINE XML_Char * _SkipChars(_In_z_ XML_Char* p, size_t count)
|
123 mike 1.1 {
|
124 krisbash 1.4 size_t i;
125 for (i = 0; i < count; ++i)
126 {
127 if (*p == 0)
128 {
129 return NULL;
130 }
131
132 p++;
133 }
134
135 return p;
|
136 mike 1.1 }
137
|
138 krisbash 1.4 INLINE int _IsFirst(XML_Char c)
|
139 mike 1.1 {
|
140 krisbash 1.4 #if defined(CONFIG_ENABLE_WCHAR)
141 if (c >= 0 && c < 256)
142 return _nameChar[(unsigned char)c] & 2;
143 else
144 return 0;
145 #else
146 return _nameChar[(unsigned char)c] & 2;
147 #endif
148 }
149
150 INLINE int _IsInner(XML_Char c)
151 {
152 #if defined(CONFIG_ENABLE_WCHAR)
153 if (c >= 0 && c < 256)
154 return _nameChar[(unsigned char)c];
155 else
156 return 0;
157 #else
|
158 mike 1.1 return _nameChar[(unsigned char)c];
|
159 krisbash 1.4 #endif
|
160 mike 1.1 }
161
|
162 krisbash 1.4 INLINE XML_Char* _SkipInner(_In_z_ XML_Char* p)
|
163 mike 1.1 {
|
164 krisbash 1.4 while (*p && _IsInner(*p))
|
165 mike 1.1 p++;
166
167 return p;
168 }
169
|
170 krisbash 1.4 static XML_Char* _SkipSpacesAux(_Inout_ XML* self, _In_z_ XML_Char* p)
|
171 mike 1.1 {
|
172 krisbash 1.4 XML_UChar x;
|
173 mike 1.1 size_t n = 0;
174
|
175 krisbash 1.4 while (*p && (x = (XML_UChar)_IsSpace(*p)) != 0)
|
176 mike 1.1 {
177 n += 0x01 & x;
178 p++;
179 }
180
181 self->line += n;
182 return p;
183 }
184
|
185 krisbash 1.4 INLINE XML_Char* _SkipSpaces(_Inout_ XML* self, _In_z_ XML_Char* p)
|
186 mike 1.1 {
|
187 krisbash 1.4 if (!p[0] || !_IsSpace(p[0]))
|
188 mike 1.1 return p;
189
190 if (p[0] == '\n')
191 self->line++;
192
|
193 krisbash 1.4 if (!p[1] || !_IsSpace(p[1]))
|
194 mike 1.1 return &p[1];
195
196 if (p[1] == '\n')
197 self->line++;
198
|
199 krisbash 1.4 if (!p[2] || !_IsSpace(p[2]))
|
200 mike 1.1 return &p[2];
201
202 if (p[2] == '\n')
203 self->line++;
204
205 return _SkipSpacesAux(self, &p[3]);
206 }
207
|
208 krisbash 1.4 INLINE XML_Char* _ToEntityRef(_Inout_ XML* self, _In_z_ XML_Char* p, _Out_ XML_Char* ch)
|
209 mike 1.1 {
210 /* Note: we collected the following statistics on the frequency of
211 * each entity reference in a large body of XML documents:
212 *
213 * " - 74,480 occurences
214 * ' - 13,877 occurences
215 * < - 9,919 occurences
216 * > - 9,853 occurences
217 * & - 111 occurences
218 *
219 * The cases below are organized in order of statistical frequency.
220 */
221
222 /* Match one of these: "lt;", "gt;", "amp;", "quot;", "apos;" */
223
224 if (p[0] == 'q' && p[1] == 'u' && p[2] == 'o' && p[3] == 't' && p[4] == ';')
225 {
226 *ch = '"';
227 return p + 5;
228 }
229
230 mike 1.1 if (p[0] == 'a' && p[1] == 'p' && p[2] == 'o' && p[3] == 's' && p[4] == ';')
231 {
232 *ch = '\'';
233 return p + 5;
234 }
235
236 if (p[0] == 'l' && p[1] == 't' && p[2] == ';')
237 {
238 *ch = '<';
239 return p + 3;
240 }
241
242 if (p[0] == 'g' && p[1] == 't' && p[2] == ';')
243 {
244 *ch = '>';
245 return p + 3;
246 }
247
248 if (p[0] == 'a' && p[1] == 'm' && p[2] == 'p' && p[3] == ';')
249 {
250 *ch = '&';
251 mike 1.1 return p + 4;
252 }
253
|
254 krisbash 1.4 *ch = 0;
255 XML_Raise(self, XML_ERROR_BAD_ENTITY_REFERENCE);
|
256 mike 1.1 return p;
257 }
258
|
259 krisbash 1.4 INLINE XML_Char* _ToCharRef(_Inout_ XML* self, _In_z_ XML_Char* p, _Out_ XML_Char* ch)
|
260 mike 1.1 {
|
261 krisbash 1.4 XML_Char* end = NULL;
|
262 mike 1.1 unsigned long x;
263
264 if (*p == 'x')
265 {
266 p++;
|
267 krisbash 1.4 x = XML_strtoul(p, &end, 16);
|
268 mike 1.1 }
269 else
270 {
|
271 krisbash 1.4 x = XML_strtoul(p, &end, 10);
|
272 mike 1.1 }
273
274 if (end == p || *end != ';' || x > 255)
275 {
276 *ch = '\0';
|
277 krisbash 1.4 XML_Raise(self, XML_ERROR_BAD_CHARACTER_REFERENCE);
|
278 mike 1.1 return p;
279 }
280
|
281 krisbash 1.4 *ch = (XML_Char)x;
|
282 mike 1.1
283 return end + 1;
284 }
285
|
286 krisbash 1.4 INLINE XML_Char* _ToRef(_Inout_ XML* self, _In_z_ XML_Char* p, _Out_ XML_Char* ch)
|
287 mike 1.1 {
288 /* Examples:
289 * @
290 * &xFF;
291 * &
292 * <
293 */
294 if (*p == '#')
295 return _ToCharRef(self, p + 1, ch);
296 else
297 return _ToEntityRef(self, p, ch);
298 }
299
|
300 krisbash 1.4 /* Matches all but '\0', '\'', '"', and '&'. All matching charcters
301 * yeild 2, except for '\n', which yields 1
302 */
303 static const unsigned char _ReduceAttrValueMatchChars[256] =
304 {
305 0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
306 1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
307 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
308 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
309 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
310 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
311 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
312 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
313 };
314
315 INLINE int _ReduceAttrValueMatch(XML_Char c)
316 {
317 #if defined(CONFIG_ENABLE_WCHAR)
318 if (c >= 0 && c < 256)
319 return _ReduceAttrValueMatchChars[(unsigned char)c];
320 else
321 krisbash 1.4 return 1;
322 #else
323 return _ReduceAttrValueMatchChars[(unsigned char)c];
324 #endif
325 }
326
|
327 mike 1.1 /* Reduce entity references and remove leading and trailing whitespace */
|
328 krisbash 1.4 static XML_Char* _ReduceAttrValue(_Inout_ XML* self, _Inout_ XMLCharPtr* pInOut, XML_Char eos)
|
329 mike 1.1 {
|
330 krisbash 1.4 _Null_terminated_ XML_Char* p = *pInOut;
331 _Null_terminated_ XML_Char* end;
|
332 mike 1.1 size_t n = 0;
333
|
334 krisbash 1.4 if (!p)
335 return NULL;
336
337 while (*p)
|
338 mike 1.1 {
|
339 krisbash 1.4 if (_ReduceAttrValueMatch(*p))
340 {
|
341 mike 1.1 p++;
|
342 krisbash 1.4 }
343 else
344 {
345 if (*p == '\0' || *p != '\n')
346 break;
|
347 mike 1.1
|
348 krisbash 1.4 self->line++;
349 p++;
350 }
|
351 mike 1.1 }
352
353 end = p;
354
355 while (*p && *p != eos)
356 {
357 if (*p == '&')
358 {
|
359 krisbash 1.4 XML_Char c = '\0';
360 _Null_terminated_ XML_Char* tmp;
|
361 mike 1.1
362 p++;
363 tmp = _ToRef(self, p, &c);
364
365 if (self->status)
366 {
367 /* Propagate error */
368 return NULL;
369 }
370
371 *end++ = c;
372 p = tmp;
373 }
374 else
375 {
376 if (*p == '\n')
377 n++;
378
379 *end++ = *p++;
380 }
381 }
382 mike 1.1
383 *pInOut = p;
384 self->line += n;
385
386 return end;
387 }
388
|
389 krisbash 1.4 /* Match all but these: '\0', '<', '&', '\n' */
390 static const unsigned char _ReduceCharDataMatchChars[256] =
391 {
392 0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
393 1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,
394 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
395 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
396 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
397 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
398 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
399 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
400 };
401
402 INLINE int _ReduceCharDataMatch(XML_Char c)
403 {
404 #if defined(CONFIG_ENABLE_WCHAR)
405 if (c >= 0 && c < 256)
406 return _ReduceCharDataMatchChars[(unsigned char)c];
407 else
408 return 1;
409 #else
410 krisbash 1.4 return _ReduceCharDataMatchChars[(unsigned char)c];
411 #endif
412 }
413
|
414 mike 1.1 /* Reduce character data, advance p, and return pointer to end */
|
415 krisbash 1.4 static XML_Char* _ReduceCharData(_Inout_ XML* self, _Inout_ XMLCharPtr* pInOut)
|
416 mike 1.1 {
|
417 krisbash 1.4 XML_Char* p = *pInOut;
418 XML_Char* end;
|
419 mike 1.1 size_t n = 0;
420
|
421 krisbash 1.4 if (!p)
422 return NULL;
423
424 while (*p)
|
425 mike 1.1 {
|
426 krisbash 1.4 if (_ReduceCharDataMatch(*p))
427 {
|
428 mike 1.1 p++;
|
429 krisbash 1.4 }
430 else
431 {
432 if (!*p || *p != '\n')
433 break;
|
434 mike 1.1
|
435 krisbash 1.4 self->line++;
436 p++;
437 }
|
438 mike 1.1 }
439
440 end = p;
441
442 /* Can we return now? */
443 if (*p == '<')
444 {
445 *pInOut = p;
446 self->line += n;
447 return end;
448 }
449
450 /* Seek next tag start */
|
451 krisbash 1.4 #ifdef _PREFAST_
452 #pragma prefast (push)
453 #pragma prefast (disable: 26018)
454 /* OACR does not like loops inside loops that modify buffer pointers, the logic however is correct */
455 #endif
|
456 mike 1.1 while (*p && *p != '<')
|
457 krisbash 1.4 #ifdef _PREFAST_
458 #pragma prefast (pop)
459 #endif
|
460 mike 1.1 {
461 if (*p == '&')
462 {
|
463 krisbash 1.4 XML_Char c = '\0';
464 XML_Char* tmp;
|
465 mike 1.1
466 p++;
467 tmp = _ToRef(self, p, &c);
468
469 if (tmp == p)
470 return NULL;
471
472 *end++ = c;
473 p = tmp;
474 }
475 else
476 {
477 for (;;)
478 {
|
479 krisbash 1.4 while ((_ReduceCharDataMatch(*p)))
|
480 mike 1.1 *end++ = *p++;
481
482 if (*p != '\n')
483 break;
484
485 *end++ = *p++;
486 self->line++;
487 }
488 }
489 }
490
491 /* Document cannot end with character data */
492 if (*p == '\0')
493 return NULL;
494
495 *pInOut = p;
496 self->line += n;
497
498 return end;
499 }
500
|
501 krisbash 1.4 /* Calculate a fast hash code for a strings */
502 INLINE unsigned int _HashCode(_In_reads_z_(n) const XML_Char* s, size_t n)
|
503 mike 1.1 {
504 /* This hash algorithm excludes the first character since for many strings
505 * (e.g., URIs) the first character is not unique. Instead the hash
506 * comprises three components:
507 * (1) The length
508 * (3) The last chacter
509 */
|
510 krisbash 1.4 return n ? (int)(n ^ s[n-1]) : 0;
|
511 mike 1.1 }
512
513 /* Map a URI to a single character namespace identifier */
|
514 krisbash 1.4 static XML_Char _FindNamespaceID(
515 _Inout_ XML* self,
516 _In_reads_z_(uriSize) const XML_Char* uri,
|
517 mike 1.1 size_t uriSize)
518 {
519 size_t i;
520 unsigned int code = _HashCode(uri, uriSize);
521
522 /* Resolve from client namespace registrations */
523 for (i = 0; i < self->registeredNameSpacesSize; i++)
524 {
525 XML_RegisteredNameSpace* rns = &self->registeredNameSpaces[i];
526
|
527 krisbash 1.4 if (rns->uriCode == code && XML_strcmp(rns->uri, uri) == 0)
|
528 mike 1.1 return rns->id;
529 }
530
531 /* Not found so return null id */
532 return '\0';
533 }
534
|
535 krisbash 1.4 #define XML_NS "http://www.w3.org/XML/1998/namespace"
536 #define T_XML_NS T("http://www.w3.org/XML/1998/namespace")
537 #define XML_NS_LEN (sizeof(XML_NS) - 1)
538
539 #define XMLNS_NS "http://www.w3.org/2000/xmlns/"
540 #define T_XMLNS_NS T("http://www.w3.org/2000/xmlns/")
541 #define XMLNS_NS_LEN (sizeof(XMLNS_NS) - 1)
542
543 /* Map a prefix to its XML namespace
544 * A non-empty prefix that is unmapped results in an error */
545 static const XML_NameSpace* _FindNamespace(_Inout_ XML* self, _In_z_ const XML_Char* prefix)
546 {
547 static const XML_NameSpace s_empty = {T(""), 0, T(""), 0, '\0', 0};
548 static const XML_NameSpace s_xml = {T("xml"), 3 ^ 'l', T_XML_NS, XML_NS_LEN, '\0', 0};
549 static const XML_NameSpace s_xmlns = {T("xmlns"), 5 ^ 's', T_XMLNS_NS, XMLNS_NS_LEN, '\0', 0};
550 unsigned int code = _HashCode(prefix, XML_strlen(prefix));
|
551 mike 1.1 size_t i;
552
|
553 krisbash 1.4 /* Special case: the 'xml' and 'xmlns' namespaces are fixed. */
554 if (prefix[0] == 'x' &&
555 prefix[1] == 'm' &&
556 prefix[2] == 'l')
557 {
558 if (prefix[3] == '\0')
559 return &s_xml;
560 else if (prefix[3] == 'n' &&
561 prefix[4] == 's' &&
562 prefix[5] == '\0')
563 return &s_xmlns;
564 }
|
565 mike 1.1
566 /* First check single entry cache */
567 if (self->nameSpacesCacheIndex != (size_t)-1)
568 {
|
569 krisbash 1.4 XML_NameSpace* ns;
570 _Analysis_assume_(self->nameSpacesCacheIndex < XML_MAX_NAMESPACES);
571 ns = &self->nameSpaces[self->nameSpacesCacheIndex];
|
572 mike 1.1
|
573 krisbash 1.4 if (ns->nameCode == code && XML_strcmp(ns->name, prefix) == 0)
|
574 mike 1.1 {
|
575 krisbash 1.4 return ns;
|
576 mike 1.1 }
577 }
578
579 /* Translate name to the one found in the nameSpaces[] array */
580 for (i = self->nameSpacesSize; i--; )
581 {
|
582 krisbash 1.4 const XML_NameSpace* ns = &self->nameSpaces[i];
|
583 mike 1.1
|
584 krisbash 1.4 if (ns && ns->nameCode == code && XML_strcmp(ns->name, prefix) == 0)
|
585 mike 1.1 {
586 /* Cache */
587 self->nameSpacesCacheIndex = i;
588
|
589 krisbash 1.4 return ns;
|
590 mike 1.1 }
591 }
592
|
593 krisbash 1.4 /* No match
594 * For the empty prefix this means there is no namespace
595 * Otherwise, this is an error */
596 if (prefix[0])
597 {
598 XML_Raise(self, XML_ERROR_UNDEFINED_NAMESPACE_PREFIX, tcs(prefix));
599 return NULL;
600 }
601
602 return &s_empty;
|
603 mike 1.1 }
604
605 static void _ParseAttr(
|
606 krisbash 1.4 _Inout_ XML* self,
607 _Inout_ XML_Elem* elem,
608 _Inout_ XMLCharPtr* pInOut)
609 {
610 _Null_terminated_ XML_Char* p = *pInOut;
611 XML_Char* name;
612 XML_Char* nameEnd;
613 XML_Char* value;
614 XML_Char* valueEnd;
615 XML_Char* colon = NULL;
616 XML_Char* tag = p;
617 XML_Char* prefix = T("");
618
619 if (!p)
620 {
621 XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_NAME);
622 return;
623 }
|
624 mike 1.1
625 /* Parse the attribute name */
626 {
627 name = p;
628
|
629 krisbash 1.4 if (!*p || !_IsFirst(*p))
|
630 mike 1.1 {
|
631 krisbash 1.4 XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_NAME);
|
632 mike 1.1 return;
633 }
634
635 p++;
636
|
637 krisbash 1.4 if (*p)
638 p = _SkipInner(p);
|
639 mike 1.1
640 if (*p == ':')
641 {
642 colon = p++;
643 p = _SkipInner(p);
644 }
645
646 nameEnd = p;
647 }
648
649 /* Seek the quote character (position p beyond quote) */
650 {
651 /* Skip spaces */
652 p = _SkipSpaces(self, p);
653
654 /* Expect a '=' character */
655 if (*p++ != '=')
656 {
|
657 krisbash 1.4 *nameEnd = '\0';
658 XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_EQUALS, tcs(name));
|
659 mike 1.1 return;
660 }
661 }
662
663 /* Null-terminate name now that we are beyond the '=' */
664 *nameEnd = '\0';
665
666 /* Skip spaces */
667 p = _SkipSpaces(self, p);
668
669 /* Parse the value */
670 {
|
671 krisbash 1.4 XML_Char quote;
|
672 mike 1.1
673 /* Expect opening quote */
674 if (*p != '"' && *p != '\'')
675 {
|
676 krisbash 1.4 XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_OPENING_QUOTES, tcs(name));
|
677 mike 1.1 return;
678 }
679
680 quote = *p++;
681 value = p;
682
683 valueEnd = _ReduceAttrValue(self, &p, quote);
684
685 if (self->status)
686 {
687 /* Propagate error */
688 return;
689 }
690
691 /* Expect closing quote */
692 if (*p++ != quote)
693 {
|
694 krisbash 1.4 XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_CLOSING_QUOTES, tcs(name));
|
695 mike 1.1 return;
696 }
697
698 /* Null-terminate the value */
699 *valueEnd = '\0';
700 }
701
702 /* Skip spaces */
703 p = _SkipSpaces(self, p);
704
|
705 krisbash 1.4 /* Separate the prefix and tag */
706 if (colon)
707 {
708 *colon = '\0';
709 tag = colon + 1;
710 prefix = name;
711 }
712 /* else prefix is "" and name is the tag */
713
714 /* If the first/only token is "xmlns", extract namespace */
|
715 mike 1.1 if (name[0] == 'x' &&
716 name[1] == 'm' &&
717 name[2] == 'l' &&
718 name[3] == 'n' &&
|
719 krisbash 1.4 name[4] == 's' &&
720 name[5] == '\0')
|
721 mike 1.1 {
|
722 krisbash 1.4 unsigned int tagCode = 0;
723
724 /* The namespace of the xmlns:x attribute is fixed */
725 prefix = T("xmlns");
726
727 if (colon)
|
728 mike 1.1 {
|
729 krisbash 1.4 /* For non-default namespaces */
730 tagCode = _HashCode(tag, nameEnd - tag);
|
731 mike 1.1 }
732
733 /* Add new namespace entry */
734 {
735 /* Check for stack overflow */
736 if (self->nameSpacesSize == XML_MAX_NAMESPACES)
737 {
|
738 krisbash 1.4 XML_Raise(self, XML_ERROR_TOO_MANY_NAMESPACES,
|
739 mike 1.1 (int)XML_MAX_NAMESPACES);
740 return;
741 }
742 {
|
743 krisbash 1.4 XML_NameSpace* newNs = &self->nameSpaces[self->nameSpacesSize++];
744 newNs->name = colon ? tag : T("");
745 newNs->nameCode = tagCode;
746 newNs->id = _FindNamespaceID(self, value, valueEnd - value);
747 newNs->uri = value;
748 newNs->uriSize = valueEnd - value;
749 newNs->depth = self->stackSize;
|
750 mike 1.1 }
751 }
752 }
753
754 /* Append attribute to element */
755 {
756 XML_Attr* attr;
757
758 /* Check for attribute array overflow */
759 if (elem->attrsSize == XML_MAX_ATTRIBUTES)
760 {
|
761 krisbash 1.4 elem->data.data[elem->data.size] = 0; //May not have been null termated yet
762 XML_Raise(self, XML_ERROR_TOO_MANY_ATTRIBUTES, tcs(elem->data.data), (int)XML_MAX_ATTRIBUTES);
|
763 mike 1.1 return;
764 }
765
766 attr = &elem->attrs[elem->attrsSize++];
|
767 krisbash 1.4 attr->name.data = tag;
768 attr->name.size = nameEnd - tag;
|
769 mike 1.1 attr->value = value;
|
770 krisbash 1.4 attr->valueSize = valueEnd - value;
771
772 /* Save the namespace prefix, which will be translated by the caller */
773 attr->name.namespaceUri = prefix;
|
774 mike 1.1 }
775
776 *pInOut = p;
777 }
778
779 static void _ParseProcessingInstruction(
|
780 krisbash 1.4 _Inout_ XML* self,
781 _Inout_ XML_Elem* elem,
782 _In_z_ XML_Char* p)
|
783 mike 1.1 {
784 /* <?xml version="1.0" encoding="UTF-8" standalone="yes"?> */
|
785 krisbash 1.4 XML_Char* start;
786 XML_Char* end;
|
787 mike 1.1
788 /* Advance past '?' character */
|
789 krisbash 1.4 if (*p)
790 {
791 p++;
792 }
|
793 mike 1.1
794 /* Get tag identifier */
795 {
796 start = p;
797
|
798 krisbash 1.4 if (*p)
799 {
800 p = _SkipInner(p);
801 }
|
802 mike 1.1
803 if (*p == ':')
804 {
805 p++;
|
806 krisbash 1.4 if (*p)
807 {
808 p = _SkipInner(p);
809 }
|
810 mike 1.1 }
811
|
812 krisbash 1.4 /* If input exhausted */
|
813 mike 1.1 if (*p == '\0')
814 {
|
815 krisbash 1.4 XML_Raise(self, XML_ERROR_END_OF_XML_INSTRUCTION);
|
816 mike 1.1 return;
817 }
818
819 end = p;
820 }
821
822 /* Skip spaces */
|
823 krisbash 1.4 if (*p)
824 {
825 p = _SkipSpaces(self, p);
826 }
827
828 elem->type = XML_INSTRUCTION;
829 elem->data.data = start;
830 elem->data.size = end - start;
|
831 mike 1.1
832 /* Process attributes */
833
834 while (*p && *p != '?')
835 {
836 _ParseAttr(self, elem, &p);
837
838 if (self->status)
839 {
840 /* Propagate error */
841 return;
842 }
843 }
844
|
845 krisbash 1.4 if (*p)
846 {
847 p++;
848 }
|
849 mike 1.1
850 /* Skip spaces */
|
851 krisbash 1.4 if (*p)
852 {
853 p = _SkipSpaces(self, p);
854 }
|
855 mike 1.1
856 /* Expect '>' */
857 if (*p++ != '>')
858 {
|
859 krisbash 1.4 XML_Raise(self, XML_ERROR_END_OF_INSTRUCTION_MISSING);
|
860 mike 1.1 return;
861 }
862
863 /* Return element object */
|
864 krisbash 1.4 elem->data.namespaceUri = T("");
865 elem->data.namespaceUriSize = 0;
866 elem->data.namespaceId = '\0';
|
867 mike 1.1 *end = '\0';
868
869 self->ptr = p;
870
871 if (self->foundRoot)
872 self->state = STATE_CHARS;
873 else
874 self->state = STATE_START;
875 }
876
877 static void _ParseStartTag(
|
878 krisbash 1.4 _Inout_ XML* self,
879 _Inout_ XML_Elem* elem,
880 _In_z_ XML_Char* p)
881 {
882 XML_Char* name;
883 XML_Char* nameEnd;
884 XML_Char* colon = NULL;
885 XML_Char* prefix = T("");
886 const XML_NameSpace* ns;
887 size_t attr;
|
888 mike 1.1
889 /* Found the root */
890 self->foundRoot = 1;
891
892 /* Get tag identifier */
893 {
894 name = p;
895
|
896 krisbash 1.4 if (!*p || !_IsFirst(*p++))
|
897 mike 1.1 {
|
898 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_NAME_EXPECTED);
|
899 mike 1.1 return;
900 }
901
|
902 krisbash 1.4 if (*p)
903 {
904 p = _SkipInner(p);
905 }
|
906 mike 1.1
907 if (*p == ':')
908 {
909 colon = p++;
|
910 krisbash 1.4 if (*p)
911 {
912 p = _SkipInner(p);
913 }
|
914 mike 1.1 }
915
916 /* If input exhuasted */
917 if (*p == '\0')
918 {
|
919 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_NAME_PREMATURE_END);
|
920 mike 1.1 return;
921 }
922
923 nameEnd = p;
924 }
925
|
926 krisbash 1.4 if (colon)
927 {
928 *colon = '\0';
929 prefix = name;
930 name = colon + 1;
931 }
932
|
933 mike 1.1 /* Skip spaces */
934 p = _SkipSpaces(self, p);
935
|
936 krisbash 1.4 elem->type = XML_START;
937 elem->data.data = name;
938 elem->data.size = nameEnd - name;
939
|
940 mike 1.1 /* Process attributes */
941 while (*p && *p != '/' && *p != '>')
942 {
943 _ParseAttr(self, elem, &p);
944
945 if (self->status)
946 return;
947 }
948
|
949 krisbash 1.4 /* Translate the namespace after parsing xmlns attributes */
950 ns = _FindNamespace(self, prefix);
951
952 if (self->status)
953 return;
954
955 /* Now translate the attribute namespaces */
956 /* Unprefixed attributes get a empty namespace */
957 for (attr = 0; attr < elem->attrsSize; attr++)
958 {
959 static const XML_NameSpace s_empty = {T(""), 0, T(""), 0, '\0', 0};
960 XML_Attr* item = elem->attrs + attr;
961 const XML_NameSpace* itemNS = &s_empty;
962
963 if (item->name.namespaceUri[0] != '\0')
964 {
965 /* The namespaceUri field contains the prefix */
966 itemNS = _FindNamespace(self, item->name.namespaceUri);
967
968 if (self->status)
969 return;
970 krisbash 1.4 }
971
972 item->name.namespaceUri = itemNS->uri;
973 item->name.namespaceUriSize = itemNS->uriSize;
974 item->name.namespaceId = itemNS->id;
975 }
976
977 /* Create the element */
978 elem->type = XML_START;
979 elem->data.data = name;
980 elem->data.size = nameEnd - name;
981 elem->data.namespaceUri = ns->uri;
982 elem->data.namespaceUriSize = ns->uriSize;
983 elem->data.namespaceId = ns->id;
984
|
985 mike 1.1 /* Check for empty tag */
986 if (*p == '/')
987 {
988 p++;
989
990 /* Null-terminate the tag */
991 *nameEnd = '\0';
992
993 /* Inject an empty tag onto element stack */
994 {
995 /* Check for stack overflow */
996 if (self->elemStackSize == XML_MAX_NESTED)
997 {
|
998 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_DEPTH_OVERFLOW,
999 (int)XML_MAX_NESTED);
|
1000 mike 1.1 return;
1001 }
1002
1003 self->elemStack[self->elemStackSize] = *elem;
1004 self->elemStack[self->elemStackSize].type = XML_END;
1005 self->elemStackSize++;
1006 self->nesting++;
1007 }
1008
1009 /* Skip space */
|
1010 krisbash 1.4 if (*p)
1011 {
1012 p = _SkipSpaces(self, p);
1013 }
|
1014 mike 1.1
1015 /* Expect '>' */
1016 if (*p++ != '>')
1017 {
|
1018 krisbash 1.4 *nameEnd = '\0';
1019 XML_Raise(self, XML_ERROR_ELEMENT_NAME_NOT_CLOSED, tcs(elem->data.data));
|
1020 mike 1.1 return;
1021 }
1022
1023 self->ptr = p;
1024 self->state = STATE_CHARS;
1025 return;
1026 }
1027
1028 /* Expect '>' */
1029 if (*p++ != '>')
1030 {
|
1031 krisbash 1.4 *nameEnd = '\0';
1032 XML_Raise(self, XML_ERROR_ELEMENT_NAME_NOT_CLOSED, tcs(elem->data.data));
|
1033 mike 1.1 return;
1034 }
1035
1036 /* Zero-terminate the name tag */
1037 *nameEnd = '\0';
1038
1039 /* Push opening tag */
1040 {
1041 if (self->stackSize == XML_MAX_NESTED)
1042 {
|
1043 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_DEPTH_OVERFLOW,
1044 (int)XML_MAX_NESTED);
|
1045 mike 1.1 return;
1046 }
1047
|
1048 krisbash 1.4 self->stack[self->stackSize] = elem->data;
|
1049 mike 1.1 self->stackSize++;
1050 self->nesting++;
1051 }
1052
1053 self->ptr = p;
1054
1055 if (self->foundRoot)
1056 self->state = STATE_CHARS;
1057 else
1058 self->state = STATE_START;
1059 }
1060
1061 static void _ParseEndTag(
|
1062 krisbash 1.4 _Inout_ XML* self,
1063 _Inout_ XML_Elem* elem,
1064 _In_z_ XML_Char* p)
|
1065 mike 1.1 {
1066 /* Closing element: </name> */
|
1067 krisbash 1.4 XML_Char* name;
1068 XML_Char* nameEnd;
1069 XML_Char* colon = NULL;
1070 XML_Char* prefix = T("");
1071 const XML_NameSpace *ns;
|
1072 mike 1.1
|
1073 krisbash 1.4 if (*p)
1074 {
1075 p++;
1076 }
|
1077 mike 1.1
1078 /* Skip space */
|
1079 krisbash 1.4 if (*p)
1080 {
1081 p = _SkipSpaces(self, p);
1082 }
|
1083 mike 1.1
1084 name = p;
1085
1086 /* Skip name */
1087 {
|
1088 krisbash 1.4 if (!*p || !_IsFirst(*p++))
|
1089 mike 1.1 {
|
1090 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_NAME_EXPECTED_ELEM_END);
|
1091 mike 1.1 return;
1092 }
1093
|
1094 krisbash 1.4 if (*p)
1095 {
1096 p = _SkipInner(p);
1097 }
|
1098 mike 1.1
1099 if (*p == ':')
1100 {
1101 colon = p++;
|
1102 krisbash 1.4 if (*p)
1103 {
1104 p = _SkipInner(p);
1105 }
|
1106 mike 1.1 }
1107 }
1108
1109 /* If input exhuasted */
1110 if (*p == '\0')
1111 {
|
1112 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_NAME_PREMATURE_END_ELEM_END);
|
1113 mike 1.1 return;
1114 }
1115
1116 nameEnd = p;
1117
|
1118 krisbash 1.4 if (colon)
1119 {
1120 *colon = '\0';
1121 prefix = name;
1122 name = colon + 1;
1123 }
1124
|
1125 mike 1.1 /* Skip spaces */
1126 p = _SkipSpaces(self, p);
1127
1128 /* Expect '>' */
1129 if (*p++ != '>')
1130 {
|
1131 krisbash 1.4 XML_Raise(self,XML_ERROR_ELEMENT_NAME_NOT_CLOSED_ELEM_END, tcs(name));
|
1132 mike 1.1 return;
1133 }
1134
1135 /* Null terminate name */
1136 *nameEnd = '\0';
1137
|
1138 krisbash 1.4 ns = _FindNamespace(self, prefix);
1139
1140 if (self->status)
1141 return;
|
1142 mike 1.1
1143 /* Return element object */
1144 elem->type = XML_END;
|
1145 krisbash 1.4 elem->data.data = name;
1146 elem->data.size = nameEnd - name;
1147 elem->data.namespaceUri = ns->uri;
1148 elem->data.namespaceUriSize = ns->uriSize;
1149 elem->data.namespaceId = ns->id;
|
1150 mike 1.1
1151 /* Match opening name */
1152 {
1153 /* Check for stack underflow */
1154 if (self->stackSize-- == 0)
1155 {
|
1156 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_TOO_MANY_ENDS, tcs(name));
|
1157 mike 1.1 return;
1158 }
1159
1160 self->nesting--;
1161
1162 /* Check that closing name matches opening name */
1163 {
1164 XML_Name* xn = &self->stack[self->stackSize];
1165
|
1166 krisbash 1.4 if (XML_strcmp(xn->data, name) != 0 ||
1167 xn->namespaceId != ns->id ||
1168 (ns->id == 0 && XML_strcmp(xn->namespaceUri, ns->uri) != 0))
|
1169 mike 1.1 {
|
1170 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_END_ELEMENT_TAG_NOT_MATCH_START_TAG,
1171 tcs(self->stack[self->stackSize].data), tcs(name));
|
1172 mike 1.1 return;
1173 }
1174 }
1175 }
1176
1177 /* Remove namespaces that have just gone out of scope */
1178 {
1179 size_t i;
1180 size_t n = 0;
1181
1182 for (i = self->nameSpacesSize; i--; )
1183 {
1184 if (self->nameSpaces[i].depth >= self->stackSize)
1185 n++;
1186 }
1187
1188 if (n)
1189 {
1190 self->nameSpacesSize -= n;
1191
1192 /* Clear single-entry cache */
1193 mike 1.1 if (self->nameSpacesCacheIndex >= self->nameSpacesSize)
1194 self->nameSpacesCacheIndex = (size_t)-1;
1195 }
1196 }
1197
1198 /* Set next state */
1199 self->ptr = p;
1200 self->state = STATE_CHARS;
1201 }
1202
1203 static void _ParseComment(
|
1204 krisbash 1.4 _Inout_ XML* self,
1205 _Inout_ XML_Elem* elem,
1206 _In_z_ XML_Char* p)
|
1207 mike 1.1 {
1208 /* Comment: <!-- blah blah blah --> */
|
1209 krisbash 1.4 XML_Char* start;
1210 XML_Char* end;
|
1211 mike 1.1
|
1212 krisbash 1.4 p = _SkipChars(p, 2);
1213 if (!*p)
1214 {
1215 XML_Raise(self, XML_ERROR_COMMENT_PREMATURE_END);
1216 return;
1217 }
|
1218 mike 1.1 start = p;
1219
1220 while (*p)
1221 {
1222 if (p[0] == '-' && p[1] == '-')
1223 {
1224 if (p[2] != '>')
1225 {
|
1226 krisbash 1.4 XML_Raise(self, XML_ERROR_COMMENT_END_EXPECTED);
|
1227 mike 1.1 return;
1228 }
1229
1230 /* Null-terminate this comment */
1231 end = p;
1232 p += 3;
1233
1234 /* Prepare element */
1235 elem->type = XML_COMMENT;
|
1236 krisbash 1.4 elem->data.data = start;
1237 elem->data.size = end - start;
1238 elem->data.namespaceUri = T("");
1239 elem->data.namespaceUriSize = 0;
1240 elem->data.namespaceId = '\0';
|
1241 mike 1.1 *end = '\0';
1242
1243 /* Set next state */
1244 self->ptr = p;
1245
1246 if (self->foundRoot)
1247 self->state = STATE_CHARS;
1248 else
1249 self->state = STATE_START;
1250
1251 return;
1252 }
1253 else if (p[0] == '\n')
1254 self->line++;
1255
1256 p++;
1257 }
1258
|
1259 krisbash 1.4 XML_Raise(self, XML_ERROR_COMMENT_PREMATURE_END);
|
1260 mike 1.1 }
1261
1262 static void _ParseCDATA(
|
1263 krisbash 1.4 _Inout_ XML* self,
1264 _Inout_ XML_Elem* elem,
1265 _In_z_ XML_Char* p)
|
1266 mike 1.1 {
1267 /* <![CDATA[...]]> */
|
1268 krisbash 1.4 XML_Char* start;
1269 XML_Char* end;
|
1270 mike 1.1
|
1271 krisbash 1.4 p = _SkipChars(p, 7);
1272 if (!*p)
1273 {
1274 XML_Raise(self, XML_ERROR_CDATA_PREMATURE_END);
1275 return;
1276 }
|
1277 mike 1.1 start = p;
1278
1279 while (*p)
1280 {
1281 if (p[0] == ']' && p[1] == ']' && p[2] == '>')
1282 {
1283 end = p;
1284 p += 3;
1285
1286 /* Prepare element */
1287 elem->type = XML_CHARS;
|
1288 krisbash 1.4 elem->data.data = start;
1289 elem->data.size = end - start;
1290 elem->data.namespaceUri = T("");
1291 elem->data.namespaceUriSize = 0;
1292 elem->data.namespaceId = '\0';
|
1293 mike 1.1 *end = '\0';
1294
1295 /* Set next state */
1296 self->ptr = p;
1297 self->state = STATE_CHARS;
1298
1299 return;
1300
1301 }
1302 else if (p[0] == '\n')
1303 self->line++;
1304
1305 p++;
1306 }
1307
|
1308 krisbash 1.4 XML_Raise(self, XML_ERROR_CDATA_PREMATURE_END);
|
1309 mike 1.1 return;
1310 }
1311
1312 static void _ParseDOCTYPE(
|
1313 krisbash 1.4 _Inout_ XML* self,
1314 _Inout_ XML_Elem* elem,
1315 _In_z_ XML_Char* p)
|
1316 mike 1.1 {
1317 MI_UNUSED(elem);
1318
1319 /* Recognize <!DOCTYPE ...> */
|
1320 krisbash 1.4 p = _SkipChars(p, 7);
1321 if (!*p)
1322 {
1323 XML_Raise(self, XML_ERROR_DOCTYPE_PREMATURE_END);
1324 return;
1325 }
|
1326 mike 1.1
1327 while (*p && *p != '>')
1328 {
1329 if (p[0] == '\n')
1330 self->line++;
1331
1332 p++;
1333 }
1334
1335 if (*p++ != '>')
1336 {
|
1337 krisbash 1.4 XML_Raise(self, XML_ERROR_DOCTYPE_PREMATURE_END);
|
1338 mike 1.1 return;
1339 }
1340
1341 /* Set next state */
1342 self->ptr = p;
1343
1344 if (self->foundRoot)
1345 self->state = STATE_CHARS;
1346 else
1347 self->state = STATE_START;
1348 }
1349
1350 static int _ParseCharData(
|
1351 krisbash 1.4 _Inout_ XML* self,
1352 _Inout_ XML_Elem* elem,
1353 _In_z_ XML_Char* p)
|
1354 mike 1.1 {
|
1355 krisbash 1.4 XML_Char* start;
1356 XML_Char* end;
|
1357 mike 1.1
1358 /* Reject input if it does appear inside tags */
1359 if (self->stackSize == 0)
1360 {
|
1361 krisbash 1.4 /* Finished parsing document */
1362 self->status = 1;
1363 self->ptr = p;
|
1364 mike 1.1 return 0;
1365 }
1366
1367 if (*p == '<')
1368 {
1369 self->ptr = p + 1;
1370 self->state = STATE_TAG;
1371 return 0;
1372 }
1373
1374 /* Save pointer to start of data */
1375 start = p;
1376
1377 /* reduce character data */
1378 end = _ReduceCharData(self, &p);
1379
1380 if (self->status)
1381 {
1382 /* Propagate error */
1383 return 0;
1384 }
1385 mike 1.1
1386 /* Process character data */
1387 if (*p != '<')
1388 {
|
1389 krisbash 1.4 XML_Raise(self, XML_ERROR_CHARDATA_EXPECTED_ELEMENT_END_TAG);
|
1390 mike 1.1 return 0;
1391 }
1392
1393 /* Set next state */
1394 self->ptr = p + 1;
1395 self->state = STATE_TAG;
1396
1397 /* Return character data element if non-empty */
1398 if (end == start)
1399 return 0;
1400
1401 /* Prepare element */
1402 *end = '\0';
1403 elem->type = XML_CHARS;
|
1404 krisbash 1.4 elem->data.data = start;
1405 elem->data.size = end - start;
1406 elem->data.namespaceUri = T("");
1407 elem->data.namespaceUriSize = 0;
1408 elem->data.namespaceId = '\0';
|
1409 mike 1.1
1410 /* Return 1 to indicate non-empty element */
1411 return 1;
1412 }
1413
1414 /*
1415 **==============================================================================
1416 **
1417 ** Public definitions
1418 **
1419 **==============================================================================
1420 */
1421
|
1422 krisbash 1.4 const XML_Char* XML_Elem_GetAttr(
1423 _Inout_ XML_Elem* self,
1424 XML_Char nsId,
1425 _In_z_ const XML_Char* name)
|
1426 mike 1.1 {
1427 size_t i;
1428
1429 for (i = 0; i < self->attrsSize; i++)
1430 {
|
1431 krisbash 1.4 if (nsId == self->attrs[i].name.namespaceId &&
1432 XML_strcmp(name, self->attrs[i].name.data) == 0)
|
1433 mike 1.1 return self->attrs[i].value;
1434 }
1435
1436 /* Not found! */
1437 return NULL;
1438 }
1439
1440 void XML_Init(
|
1441 krisbash 1.4 _Out_ XML* self)
|
1442 mike 1.1 {
1443 memset(self, 0, sizeof(XML));
1444
1445 self->nameSpacesCacheIndex = (size_t)-1;
1446 }
1447
1448 void XML_SetText(
|
1449 krisbash 1.4 _Inout_ XML* self,
1450 _In_z_ XML_Char* text)
|
1451 mike 1.1 {
1452 self->text = text;
1453 self->ptr = text;
1454 self->line = 1;
1455 self->state = STATE_START;
1456 }
1457
1458 int XML_Next(
|
1459 krisbash 1.4 _Inout_ XML* self,
1460 _Out_ XML_Elem* elem)
|
1461 mike 1.1 {
1462 if (self->elemStackSize)
1463 {
1464 *elem = self->elemStack[--self->elemStackSize];
1465 self->nesting--;
1466 return 0;
1467 }
1468
1469 elem->attrsSize = 0;
1470
1471 for (;;)
1472 {
1473 switch (self->state)
1474 {
1475 case STATE_START:
1476 {
|
1477 krisbash 1.4 XML_Char* p = self->ptr;
|
1478 mike 1.1
1479 /* Skip spaces */
1480 p = _SkipSpaces(self, p);
1481
1482 /* Expect '<' */
1483 if (*p != '<')
1484 {
|
1485 krisbash 1.4 XML_Raise(self, XML_ERROR_OPEN_ANGLE_BRACKET_EXPECTED);
|
1486 mike 1.1 return -1;
1487 }
1488
1489 self->ptr = p + 1;
1490 self->state = STATE_TAG;
1491 break;
1492 }
1493 case STATE_TAG:
1494 {
|
1495 krisbash 1.4 XML_Char* p = self->ptr;
|
1496 mike 1.1
1497 /* Skip spaces */
1498 p = _SkipSpaces(self, p);
1499
1500 /* Expect one of these */
1501 if (*p == '/')
1502 {
1503 _ParseEndTag(self, elem, p);
1504 return self->status;
1505 }
1506 else if (_IsFirst(*p))
1507 {
1508 _ParseStartTag(self, elem, p);
1509 return self->status;
1510 }
1511 else if (*p == '?')
1512 {
1513 _ParseProcessingInstruction(self, elem, p);
1514 return self->status;
1515 }
1516 else if (*p == '!')
1517 mike 1.1 {
1518 p++;
1519
1520 if (p[0] == '-' && p[1] == '-')
1521 {
1522 _ParseComment(self, elem, p);
1523 return self->status;
1524 }
|
1525 krisbash 1.4 else if (
1526 memcmp(p, T("[CDATA["), 7 * sizeof(XML_Char)) == 0)
|
1527 mike 1.1 {
1528 _ParseCDATA(self, elem, p);
1529 return self->status;
1530 }
|
1531 krisbash 1.4 else if (
1532 memcmp(p, T("DOCTYPE"), 7 * sizeof(XML_Char)) == 0)
|
1533 mike 1.1 {
1534 _ParseDOCTYPE(self, elem, p);
1535
1536 if (self->status)
1537 return -1;
1538
1539 break;
1540 }
1541 else
1542 {
|
1543 krisbash 1.4 XML_Raise(self, XML_ERROR_COMMENT_CDATA_DOCTYPE_EXPECTED);
|
1544 mike 1.1 return -1;
1545 }
1546 }
1547 else
1548 {
|
1549 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_EXPECTED);
|
1550 mike 1.1 return-1;
1551 }
1552 break;
1553 }
1554 case STATE_CHARS:
1555 {
|
1556 krisbash 1.4 XML_Char* p = self->ptr;
|
1557 mike 1.1
1558 if (_ParseCharData(self, elem, p) == 1)
1559 {
1560 /* Return character data to caller */
1561 return 0;
1562 }
1563
1564 if (self->status)
1565 return self->status;
1566
1567 /* empty character data */
1568 break;
1569 }
1570 default:
1571 {
|
1572 krisbash 1.4 XML_Raise(self, XML_ERROR_UNEXPECTED_STATE);
|
1573 mike 1.1 return -1;
1574 }
1575 }
1576 }
1577
1578 //return 0;
1579 }
1580
1581 int XML_Expect(
|
1582 krisbash 1.4 _Inout_ XML* self,
1583 _Out_ XML_Elem* elem,
|
1584 mike 1.1 XML_Type type,
|
1585 krisbash 1.4 XML_Char nsId,
1586 _In_z_ const XML_Char* name)
|
1587 mike 1.1 {
1588 if (XML_Next(self, elem) == 0 &&
1589 elem->type == type &&
|
1590 krisbash 1.4 nsId == elem->data.namespaceId &&
1591 (!name || XML_strcmp(elem->data.data, name) == 0))
|
1592 mike 1.1 {
1593 return 0;
1594 }
1595
1596 if (type == XML_START)
|
1597 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_NAME_EXPECTED,
1598 tcs(name), tcs(elem->data.data));
|
1599 mike 1.1 else if (type == XML_END)
|
1600 krisbash 1.4 XML_Raise(self, XML_ERROR_SPECIFIC_END_ELEMENT_EXPECTED,
1601 tcs(name), tcs(elem->data.data));
|
1602 mike 1.1 else if (type == XML_CHARS)
|
1603 krisbash 1.4 XML_Raise(self, XML_ERROR_CHARACTER_DATA_EXPECTED);
|
1604 mike 1.1
1605 return -1;
1606 }
1607
1608 int XML_Skip(
|
1609 krisbash 1.4 _Inout_ XML* self)
|
1610 mike 1.1 {
1611 XML_Elem tmp;
1612 size_t nesting = self->nesting;
1613
1614 while (self->nesting >= nesting)
1615 {
1616 if (XML_Next(self, &tmp) != 0)
1617 return -1;
1618 }
1619
1620 return 0;
1621 }
1622
1623 int XML_RegisterNameSpace(
|
1624 krisbash 1.4 _Inout_ XML* self,
1625 XML_Char id,
1626 _In_z_ const XML_Char* uri)
|
1627 mike 1.1 {
1628 XML_RegisteredNameSpace rns;
1629 /* ATTN: we do not check for duplicates */
1630
1631 /* Reject out of range ids */
1632 if (id < 'a' || id > 'z')
1633 return -1;
1634
1635 /* Check for overflow of the array */
1636 if (self->registeredNameSpacesSize == XML_MAX_REGISTERED_NAMESPACES)
1637 return -1;
1638
1639 rns.id = id;
1640 rns.uri = uri;
|
1641 krisbash 1.4 rns.uriCode = _HashCode(uri, XML_strlen(uri));
|
1642 mike 1.1
1643 self->registeredNameSpaces[self->registeredNameSpacesSize++] = rns;
1644
1645 return 0;
1646 }
1647
1648 int XML_PutBack(
|
1649 krisbash 1.4 _Inout_ XML* self,
1650 _In_ const XML_Elem* elem)
|
1651 mike 1.1 {
1652 /* Check for stack overflow */
1653 if (self->elemStackSize == XML_MAX_NESTED)
1654 {
|
1655 krisbash 1.4 XML_Raise(self, XML_ERROR_ELEMENT_DEPTH_OVERFLOW,
1656 (int)XML_MAX_NESTED);
|
1657 mike 1.1 return -1;
1658 }
1659
1660 self->elemStack[self->elemStackSize++] = *elem;
1661 return 0;
1662 }
1663
|
1664 krisbash 1.4 #if defined(_MSC_VER)
1665 void XML_Raise(_Inout_ XML* self, unsigned formatStringId, ...)
|
1666 mike 1.1 {
|
1667 krisbash 1.4 HMODULE hModule;
1668 XML_Char formatMsg[MAX_PATH];
1669 va_list ap;
1670 memset(&ap, 0, sizeof(ap));
1671
1672 self->status = -1;
1673 self->message[0] = '\0';
1674
1675 if (GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (LPCTSTR ) XML_Raise, &hModule) == 0)
1676 {
1677 memcpy(self->message, T("Failed to parse XML."), sizeof(T("Failed to parse XML.")));
1678 }
1679 else
1680 {
1681 if(LoadStringW(hModule, formatStringId, formatMsg, MAX_PATH))
1682 {
1683 va_start(ap, formatStringId);
1684 if (FormatMessageW(FORMAT_MESSAGE_FROM_STRING, formatMsg, 0, 0, self->message, sizeof(self->message)/sizeof(self->message[0]), &ap) == 0)
1685 {
1686 memcpy(self->message, T("Failed to parse XML."), sizeof(T("Failed to parse XML.")));
1687 }
1688 krisbash 1.4 va_end(ap);
1689 }
1690 else
1691 {
1692 memcpy(self->message, T("Failed to parse XML."), sizeof(T("Failed to parse XML.")));
1693 }
1694 }
|
1695 mike 1.1 }
|
1696 krisbash 1.4 #else
|
1697 mike 1.1
|
1698 krisbash 1.4 void XML_Raise(XML* self, _In_z_ const XML_Char* format, ...)
|
1699 mike 1.1 {
1700 va_list ap;
1701
1702 self->status = -1;
1703 self->message[0] = '\0';
1704
|
1705 krisbash 1.4 memset(&ap, 0, sizeof(ap));
|
1706 mike 1.1 va_start(ap, format);
|
1707 krisbash 1.4 Vstprintf(self->message, MI_COUNT(self->message), format, ap);
|
1708 mike 1.1 va_end(ap);
1709 }
|
1710 krisbash 1.4 #endif
|
1711 mike 1.1
|
1712 krisbash 1.4 void XML_FormatError(_Inout_ XML* self, _Out_writes_z_(size) XML_Char* buffer, size_t size)
|
1713 mike 1.1 {
|
1714 krisbash 1.4 *buffer = '\0';
|
1715 mike 1.1
1716 if (self->status == -1)
|
1717 krisbash 1.4 {
1718 Stprintf(
1719 buffer,
1720 size,
1721 PAL_T("%u: error: %T"),
1722 (unsigned int)self->line,
1723 tcs(self->message));
1724 }
1725 }
1726
1727
1728 int XML_StripWhitespace(
1729 _Inout_ XML_Elem* elem)
1730 {
1731 if (elem->type != XML_CHARS)
1732 {
1733 return -1;
1734 }
1735
1736 //Strip leading white space
1737 while (elem->data.size && _IsSpace(*elem->data.data))
1738 krisbash 1.4 {
1739 elem->data.data++;
1740 elem->data.size--;
1741 }
1742 //Strip trailing white space
1743 while(elem->data.size && _IsSpace(elem->data.data[elem->data.size-1]))
1744 {
1745 elem->data.data[elem->data.size-1] = T('\0');
1746 elem->data.size--;
1747 }
1748 return 0;
|
1749 mike 1.1 }
|