(file) Return to xml.c CVS log (file) (dir) Up to [OMI] / omi / xml

   1 mike  1.1 /*
   2           **==============================================================================
   3           **
   4           ** Open Management Infrastructure (OMI)
   5           **
   6           ** Copyright (c) Microsoft Corporation
   7           ** 
   8           ** Licensed under the Apache License, Version 2.0 (the "License"); you may not 
   9           ** use this file except in compliance with the License. You may obtain a copy 
  10           ** of the License at 
  11           **
  12           **     http://www.apache.org/licenses/LICENSE-2.0 
  13           **
  14           ** THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15           ** KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 
  16           ** WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 
  17           ** MERCHANTABLITY OR NON-INFRINGEMENT. 
  18           **
  19           ** See the Apache 2 License for the specific language governing permissions 
  20           ** and limitations under the License.
  21           **
  22 mike  1.1 **==============================================================================
  23           */
  24           
  25           #include <common.h>
  26           #include "xml.h"
  27           #include <string.h>
  28           #include <stdio.h>
  29           #include <stdlib.h>
  30           #include <stdarg.h>
  31           #include <ctype.h>
  32 krisbash 1.4 #include <pal/format.h>
  33 mike     1.1 
  34              #if defined(_MSC_VER)
  35              /* PreFast - reviewed and believed to be false-positive*/
  36              
  37              /* warning C6385: Invalid data: accessing '??', the readable size is 'x' bytes, but 'y' bytes might be read: Lines: ... */
  38              # pragma warning(disable : 6385)
  39              /* warning C6386: Buffer overrun: accessing 'self->registeredNameSpaces' ... */
  40              # pragma warning(disable : 6386)
  41              
  42              #endif /* _MSC_VER */
  43              
  44 krisbash 1.4 #if defined(CONFIG_ENABLE_WCHAR)
  45              # define T(STR) L##STR
  46              # define XML_strtoul wcstoul
  47              # define XML_strcmp wcscmp
  48              # define XML_strlen wcslen
  49              #else
  50              # define T(STR) STR
  51              # define T(STR) STR
  52              # define XML_strtoul strtoul
  53              # define XML_strcmp strcmp
  54              # define XML_strlen strlen
  55              #endif
  56              
  57 mike     1.1 /*
  58              **==============================================================================
  59              **
  60              ** Local definitions
  61              **
  62              **==============================================================================
  63              */
  64              
  65              typedef enum _XML_State
  66              {
  67                  STATE_START,
  68                  STATE_TAG,
  69                  STATE_CHARS,
  70              }
  71              XML_State;
  72              
  73              /* Space characters include [\n\t\r ]
  74               *     _spaceChar['\n'] => 1
  75               *     _spaceChar['\r'] => 2
  76               *     _spaceChar['\t'] => 2
  77               *     _spaceChar[' '] => 2
  78 mike     1.1  */
  79 krisbash 1.4 static const unsigned char _spaceChar[256] =
  80 mike     1.1 {
  81                  0,0,0,0,0,0,0,0,0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  82                  2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  83                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  84                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  85                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  86                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  87                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  88                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  89              };
  90              
  91 krisbash 1.4 INLINE int _IsSpace(XML_Char c)
  92 mike     1.1 {
  93 krisbash 1.4 #if defined(CONFIG_ENABLE_WCHAR)
  94                  if (c >= 0 && c < 256)
  95                      return _spaceChar[(unsigned char)c];
  96                  else
  97                      return 0;
  98              #else
  99                  return _spaceChar[(unsigned char)c];
 100              #endif
 101 mike     1.1 }
 102              
 103              /* Matches XML name characters of the form: [A-Za-z_][A-Za-z0-9_-.:]*
 104               *     _nameChar[A-Za-z_] => 2 (first character)
 105               *     _nameChar[A-Za-z0-9_-.:] => 1 or 2 (inner character)
 106               */
 107 krisbash 1.4 static const unsigned char _nameChar[256] =
 108 mike     1.1 {
 109                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 110                  0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
 111                  0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,2,
 112                  0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,
 113                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 114                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 115                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 116                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 117              };
 118              
 119 krisbash 1.4 /*
 120                  Skips N characters while checking that end of the string has not been hit
 121              */
 122              INLINE XML_Char * _SkipChars(_In_z_ XML_Char* p, size_t count)
 123 mike     1.1 {
 124 krisbash 1.4     size_t i;
 125                  for (i = 0; i < count; ++i)
 126                  {
 127                      if (*p == 0)
 128                      {
 129                          return NULL;
 130                      }
 131              
 132                      p++;
 133                  }
 134              
 135                  return p;
 136 mike     1.1 }
 137              
 138 krisbash 1.4 INLINE int _IsFirst(XML_Char c)
 139 mike     1.1 {
 140 krisbash 1.4 #if defined(CONFIG_ENABLE_WCHAR)
 141                  if (c >= 0 && c < 256)
 142                      return _nameChar[(unsigned char)c] & 2;
 143                  else
 144                      return 0;
 145              #else
 146                  return _nameChar[(unsigned char)c] & 2;
 147              #endif
 148              }
 149              
 150              INLINE int _IsInner(XML_Char c)
 151              {
 152              #if defined(CONFIG_ENABLE_WCHAR)
 153                  if (c >= 0 && c < 256)
 154                      return _nameChar[(unsigned char)c];
 155                  else
 156                      return 0;
 157              #else
 158 mike     1.1     return _nameChar[(unsigned char)c];
 159 krisbash 1.4 #endif
 160 mike     1.1 }
 161              
 162 krisbash 1.4 INLINE XML_Char* _SkipInner(_In_z_ XML_Char* p)
 163 mike     1.1 {
 164 krisbash 1.4     while (*p && _IsInner(*p))
 165 mike     1.1         p++;
 166              
 167                  return p;
 168              }
 169              
 170 krisbash 1.4 static XML_Char* _SkipSpacesAux(_Inout_ XML* self, _In_z_ XML_Char* p)
 171 mike     1.1 {
 172 krisbash 1.4     XML_UChar x;
 173 mike     1.1     size_t n = 0;
 174              
 175 krisbash 1.4     while (*p && (x = (XML_UChar)_IsSpace(*p)) != 0)
 176 mike     1.1     {
 177                      n += 0x01 & x;
 178                      p++;
 179                  }
 180              
 181                  self->line += n;
 182                  return p;
 183              }
 184              
 185 krisbash 1.4 INLINE XML_Char* _SkipSpaces(_Inout_ XML* self, _In_z_ XML_Char* p)
 186 mike     1.1 {
 187 krisbash 1.4     if (!p[0] || !_IsSpace(p[0]))
 188 mike     1.1         return p;
 189              
 190                  if (p[0] == '\n')
 191                      self->line++;
 192              
 193 krisbash 1.4     if (!p[1] || !_IsSpace(p[1]))
 194 mike     1.1         return &p[1];
 195              
 196                  if (p[1] == '\n')
 197                      self->line++;
 198              
 199 krisbash 1.4     if (!p[2] || !_IsSpace(p[2]))
 200 mike     1.1         return &p[2];
 201              
 202                  if (p[2] == '\n')
 203                      self->line++;
 204              
 205                  return _SkipSpacesAux(self, &p[3]);
 206              }
 207              
 208 krisbash 1.4 INLINE XML_Char* _ToEntityRef(_Inout_ XML* self, _In_z_ XML_Char* p, _Out_ XML_Char* ch)
 209 mike     1.1 {
 210                  /* Note: we collected the following statistics on the frequency of
 211                   * each entity reference in a large body of XML documents:
 212                   *     
 213                   *     &quot; - 74,480 occurences
 214                   *     &apos; - 13,877 occurences
 215                   *     &lt;   -  9,919 occurences
 216                   *     &gt;   -  9,853 occurences
 217                   *     &amp;  -    111 occurences
 218                   *
 219                   * The cases below are organized in order of statistical frequency.
 220                   */
 221              
 222                  /* Match one of these: "lt;", "gt;", "amp;", "quot;", "apos;" */
 223              
 224                  if (p[0] == 'q' && p[1] == 'u' && p[2] == 'o' && p[3] == 't' && p[4] == ';')
 225                  {
 226                      *ch = '"';
 227                      return p + 5;
 228                  }
 229              
 230 mike     1.1     if (p[0] == 'a' && p[1] == 'p' && p[2] == 'o' && p[3] == 's' && p[4] == ';')
 231                  {
 232                      *ch = '\'';
 233                      return p + 5;
 234                  }
 235              
 236                  if (p[0] == 'l' && p[1] == 't' && p[2] == ';')
 237                  {
 238                      *ch = '<';
 239                      return p + 3;
 240                  }
 241              
 242                  if (p[0] == 'g' && p[1] == 't' && p[2] == ';')
 243                  {
 244                      *ch = '>';
 245                      return p + 3;
 246                  }
 247              
 248                  if (p[0] == 'a' && p[1] == 'm' && p[2] == 'p' && p[3] == ';')
 249                  {
 250                      *ch = '&';
 251 mike     1.1         return p + 4;
 252                  }
 253              
 254 krisbash 1.4     *ch = 0;
 255                  XML_Raise(self, XML_ERROR_BAD_ENTITY_REFERENCE);
 256 mike     1.1     return p;
 257              }
 258              
 259 krisbash 1.4 INLINE XML_Char* _ToCharRef(_Inout_ XML* self, _In_z_ XML_Char* p, _Out_ XML_Char* ch)
 260 mike     1.1 {
 261 krisbash 1.4     XML_Char* end = NULL;
 262 mike     1.1     unsigned long x;
 263              
 264                  if (*p == 'x')
 265                  {
 266                      p++;
 267 krisbash 1.4         x = XML_strtoul(p, &end, 16);
 268 mike     1.1     }
 269                  else
 270                  {
 271 krisbash 1.4         x = XML_strtoul(p, &end, 10);
 272 mike     1.1     }
 273              
 274                  if (end == p || *end != ';' || x > 255)
 275                  {
 276                      *ch = '\0';
 277 krisbash 1.4         XML_Raise(self, XML_ERROR_BAD_CHARACTER_REFERENCE);
 278 mike     1.1         return p;
 279                  }
 280              
 281 krisbash 1.4     *ch = (XML_Char)x;
 282 mike     1.1 
 283                  return end + 1;
 284              }
 285              
 286 krisbash 1.4 INLINE XML_Char* _ToRef(_Inout_ XML* self, _In_z_ XML_Char* p, _Out_ XML_Char* ch)
 287 mike     1.1 {
 288                  /* Examples:
 289                   *     &#64;
 290                   *     &xFF;
 291                   *     &amp;
 292                   *     &lt;
 293                   */
 294                  if (*p == '#')
 295                      return _ToCharRef(self, p + 1, ch);
 296                  else
 297                      return _ToEntityRef(self, p, ch);
 298              }
 299              
 300 krisbash 1.4 /* Matches all but '\0', '\'', '"', and '&'. All matching charcters
 301               * yeild 2, except for '\n', which yields 1 
 302               */
 303              static const unsigned char _ReduceAttrValueMatchChars[256] =
 304              {
 305                  0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 306                  1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 307                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 308                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 309                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 310                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 311                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 312                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 313              };
 314              
 315              INLINE int _ReduceAttrValueMatch(XML_Char c)
 316              {
 317              #if defined(CONFIG_ENABLE_WCHAR)
 318                  if (c >= 0 && c < 256)
 319                      return _ReduceAttrValueMatchChars[(unsigned char)c];
 320                  else
 321 krisbash 1.4         return 1;
 322              #else
 323                  return _ReduceAttrValueMatchChars[(unsigned char)c];
 324              #endif
 325              }
 326              
 327 mike     1.1 /* Reduce entity references and remove leading and trailing whitespace */
 328 krisbash 1.4 static XML_Char* _ReduceAttrValue(_Inout_ XML* self, _Inout_ XMLCharPtr* pInOut, XML_Char eos)
 329 mike     1.1 {
 330 krisbash 1.4     _Null_terminated_ XML_Char* p = *pInOut;
 331                  _Null_terminated_ XML_Char* end;
 332 mike     1.1     size_t n = 0;
 333              
 334 krisbash 1.4     if (!p)
 335                      return NULL;
 336              
 337                  while (*p)
 338 mike     1.1     {
 339 krisbash 1.4         if (_ReduceAttrValueMatch(*p))
 340                      {
 341 mike     1.1             p++;
 342 krisbash 1.4         }
 343                      else
 344                      {
 345                          if (*p == '\0' || *p != '\n')
 346                              break;
 347 mike     1.1 
 348 krisbash 1.4             self->line++;
 349                          p++;
 350                      }
 351 mike     1.1     }
 352              
 353                  end = p;
 354              
 355                  while (*p && *p != eos)
 356                  {
 357                      if (*p == '&')
 358                      {
 359 krisbash 1.4             XML_Char c = '\0';
 360                          _Null_terminated_ XML_Char* tmp;
 361 mike     1.1             
 362                          p++;
 363                          tmp = _ToRef(self, p, &c);
 364              
 365                          if (self->status)
 366                          {
 367                              /* Propagate error */
 368                              return NULL;
 369                          }
 370              
 371                          *end++ = c;
 372                          p = tmp;
 373                      }
 374                      else
 375                      {
 376                          if (*p == '\n')
 377                              n++;
 378              
 379                          *end++ = *p++;
 380                      }
 381                  }
 382 mike     1.1 
 383                  *pInOut = p;
 384                  self->line += n;
 385              
 386                  return end;
 387              }
 388              
 389 krisbash 1.4 /* Match all but these: '\0', '<', '&', '\n' */
 390              static const unsigned char _ReduceCharDataMatchChars[256] =
 391              {
 392                  0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 393                  1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,
 394                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 395                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 396                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 397                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 398                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 399                  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 400              };
 401              
 402              INLINE int _ReduceCharDataMatch(XML_Char c)
 403              {
 404              #if defined(CONFIG_ENABLE_WCHAR)
 405                  if (c >= 0 && c < 256)
 406                      return _ReduceCharDataMatchChars[(unsigned char)c];
 407                  else
 408                      return 1;
 409              #else
 410 krisbash 1.4     return _ReduceCharDataMatchChars[(unsigned char)c];
 411              #endif
 412              }
 413              
 414 mike     1.1 /* Reduce character data, advance p, and return pointer to end */
 415 krisbash 1.4 static XML_Char* _ReduceCharData(_Inout_ XML* self, _Inout_ XMLCharPtr* pInOut)
 416 mike     1.1 {
 417 krisbash 1.4     XML_Char* p = *pInOut;
 418                  XML_Char* end;
 419 mike     1.1     size_t n = 0;
 420              
 421 krisbash 1.4     if (!p)
 422                      return NULL;
 423              
 424                  while (*p)
 425 mike     1.1     {
 426 krisbash 1.4         if (_ReduceCharDataMatch(*p))
 427                      {
 428 mike     1.1             p++;
 429 krisbash 1.4         }
 430                      else
 431                      {
 432                          if (!*p || *p != '\n')
 433                              break;
 434 mike     1.1 
 435 krisbash 1.4             self->line++;
 436                          p++;
 437                      }
 438 mike     1.1     }
 439              
 440                  end = p;
 441              
 442                  /* Can we return now? */
 443                  if (*p == '<')
 444                  {
 445                      *pInOut = p;
 446                      self->line += n;
 447                      return end;
 448                  }
 449              
 450                  /* Seek next tag start */
 451 krisbash 1.4 #ifdef _PREFAST_
 452              #pragma prefast (push)
 453              #pragma prefast (disable: 26018)
 454                  /* OACR does not like loops inside loops that modify buffer pointers, the logic however is correct */
 455              #endif
 456 mike     1.1     while (*p && *p != '<')
 457 krisbash 1.4 #ifdef _PREFAST_
 458              #pragma prefast (pop)
 459              #endif
 460 mike     1.1     {
 461                      if (*p == '&')
 462                      {
 463 krisbash 1.4             XML_Char c = '\0';
 464                          XML_Char* tmp;
 465 mike     1.1             
 466                          p++;
 467                          tmp = _ToRef(self, p, &c);
 468              
 469                          if (tmp == p)
 470                              return NULL;
 471              
 472                          *end++ = c;
 473                          p = tmp;
 474                      }
 475                      else
 476                      {
 477                          for (;;)
 478                          {
 479 krisbash 1.4                 while ((_ReduceCharDataMatch(*p)))
 480 mike     1.1                     *end++ = *p++;
 481              
 482                              if (*p != '\n')
 483                                  break;
 484              
 485                              *end++ = *p++;
 486                              self->line++;
 487                          }
 488                      }
 489                  }
 490              
 491                  /* Document cannot end with character data */
 492                  if (*p == '\0')
 493                      return NULL;
 494              
 495                  *pInOut = p;
 496                  self->line += n;
 497              
 498                  return end;
 499              }
 500              
 501 krisbash 1.4 /* Calculate a fast hash code for a strings */
 502              INLINE unsigned int _HashCode(_In_reads_z_(n) const XML_Char* s, size_t n)
 503 mike     1.1 {
 504                  /* This hash algorithm excludes the first character since for many strings 
 505                   * (e.g., URIs) the first character is not unique. Instead the hash 
 506                   * comprises three components:
 507                   *     (1) The length
 508                   *     (3) The last chacter
 509                   */
 510 krisbash 1.4     return n ? (int)(n ^ s[n-1]) : 0;
 511 mike     1.1 }
 512              
 513              /* Map a URI to a single character namespace identifier */
 514 krisbash 1.4 static XML_Char _FindNamespaceID(
 515                  _Inout_ XML* self, 
 516                  _In_reads_z_(uriSize) const XML_Char* uri,
 517 mike     1.1     size_t uriSize)
 518              {
 519                  size_t i;
 520                  unsigned int code = _HashCode(uri, uriSize);
 521              
 522                  /* Resolve from client namespace registrations */
 523                  for (i = 0; i < self->registeredNameSpacesSize; i++)
 524                  {
 525                      XML_RegisteredNameSpace* rns = &self->registeredNameSpaces[i];
 526              
 527 krisbash 1.4         if (rns->uriCode == code && XML_strcmp(rns->uri, uri) == 0)
 528 mike     1.1             return rns->id;
 529                  }
 530              
 531                  /* Not found so return null id */
 532                  return '\0';
 533              }
 534              
 535 krisbash 1.4 #define XML_NS "http://www.w3.org/XML/1998/namespace"
 536              #define T_XML_NS T("http://www.w3.org/XML/1998/namespace")
 537              #define XML_NS_LEN (sizeof(XML_NS) - 1)
 538              
 539              #define XMLNS_NS "http://www.w3.org/2000/xmlns/"
 540              #define T_XMLNS_NS T("http://www.w3.org/2000/xmlns/")
 541              #define XMLNS_NS_LEN (sizeof(XMLNS_NS) - 1)
 542              
 543              /* Map a prefix to its XML namespace
 544               * A non-empty prefix that is unmapped results in an error */
 545              static const XML_NameSpace* _FindNamespace(_Inout_ XML* self, _In_z_ const XML_Char* prefix)
 546              {
 547                  static const XML_NameSpace s_empty = {T(""), 0, T(""), 0, '\0', 0};
 548                  static const XML_NameSpace s_xml = {T("xml"), 3 ^ 'l', T_XML_NS, XML_NS_LEN, '\0', 0};
 549                  static const XML_NameSpace s_xmlns = {T("xmlns"), 5 ^ 's', T_XMLNS_NS, XMLNS_NS_LEN, '\0', 0};
 550                  unsigned int code = _HashCode(prefix, XML_strlen(prefix));
 551 mike     1.1     size_t i;
 552              
 553 krisbash 1.4     /* Special case: the 'xml' and 'xmlns' namespaces are fixed. */
 554                  if (prefix[0] == 'x' &&
 555                      prefix[1] == 'm' &&
 556                      prefix[2] == 'l')
 557                  {
 558                      if (prefix[3] == '\0')
 559                          return &s_xml;
 560                      else if (prefix[3] == 'n' &&
 561                               prefix[4] == 's' &&
 562                               prefix[5] == '\0')
 563                          return &s_xmlns;
 564                  }
 565 mike     1.1 
 566                  /* First check single entry cache */
 567                  if (self->nameSpacesCacheIndex != (size_t)-1)
 568                  {
 569 krisbash 1.4         XML_NameSpace* ns;
 570                      _Analysis_assume_(self->nameSpacesCacheIndex < XML_MAX_NAMESPACES);
 571                      ns = &self->nameSpaces[self->nameSpacesCacheIndex];
 572 mike     1.1 
 573 krisbash 1.4         if (ns->nameCode == code && XML_strcmp(ns->name, prefix) == 0)
 574 mike     1.1         {
 575 krisbash 1.4             return ns;
 576 mike     1.1         }
 577                  }
 578              
 579                  /* Translate name to the one found in the nameSpaces[] array */
 580                  for (i = self->nameSpacesSize; i--; )
 581                  {
 582 krisbash 1.4         const XML_NameSpace* ns = &self->nameSpaces[i];
 583 mike     1.1 
 584 krisbash 1.4         if (ns && ns->nameCode == code && XML_strcmp(ns->name, prefix) == 0)
 585 mike     1.1         {
 586                          /* Cache */
 587                          self->nameSpacesCacheIndex = i;
 588              
 589 krisbash 1.4             return ns;
 590 mike     1.1         }
 591                  }
 592              
 593 krisbash 1.4     /* No match
 594                   * For the empty prefix this means there is no namespace
 595                   * Otherwise, this is an error */
 596                  if (prefix[0])
 597                  {
 598                      XML_Raise(self, XML_ERROR_UNDEFINED_NAMESPACE_PREFIX, tcs(prefix));
 599                      return NULL;
 600                  }
 601              
 602                  return &s_empty;
 603 mike     1.1 }
 604              
 605              static void _ParseAttr(
 606 krisbash 1.4     _Inout_ XML* self, 
 607                  _Inout_ XML_Elem* elem, 
 608                  _Inout_ XMLCharPtr* pInOut)
 609              {
 610                  _Null_terminated_ XML_Char* p = *pInOut;
 611                  XML_Char* name;
 612                  XML_Char* nameEnd;
 613                  XML_Char* value;
 614                  XML_Char* valueEnd;
 615                  XML_Char* colon = NULL;
 616                  XML_Char* tag = p;
 617                  XML_Char* prefix = T("");
 618              
 619                  if (!p)
 620                  {
 621                      XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_NAME);
 622                      return;
 623                  }
 624 mike     1.1 
 625                  /* Parse the attribute name */
 626                  {
 627                      name = p;
 628              
 629 krisbash 1.4         if (!*p || !_IsFirst(*p))
 630 mike     1.1         {
 631 krisbash 1.4             XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_NAME);
 632 mike     1.1             return;
 633                      }
 634              
 635                      p++;
 636              
 637 krisbash 1.4         if (*p)
 638                          p = _SkipInner(p);
 639 mike     1.1 
 640                      if (*p == ':')
 641                      {
 642                          colon = p++;
 643                          p = _SkipInner(p);
 644                      }
 645              
 646                      nameEnd = p;
 647                  }
 648              
 649                  /* Seek the quote character (position p beyond quote) */
 650                  {
 651                      /* Skip spaces */
 652                      p = _SkipSpaces(self, p);
 653              
 654                      /* Expect a '=' character */
 655                      if (*p++ != '=')
 656                      {
 657 krisbash 1.4             *nameEnd = '\0';
 658                          XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_EQUALS, tcs(name));
 659 mike     1.1             return;
 660                      }
 661                  }
 662              
 663                  /* Null-terminate name now that we are beyond the '=' */
 664                  *nameEnd = '\0';
 665              
 666                  /* Skip spaces */
 667                  p = _SkipSpaces(self, p);
 668              
 669                  /* Parse the value */
 670                  {
 671 krisbash 1.4         XML_Char quote;
 672 mike     1.1 
 673                      /* Expect opening quote */
 674                      if (*p != '"' && *p != '\'')
 675                      {
 676 krisbash 1.4             XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_OPENING_QUOTES, tcs(name));
 677 mike     1.1             return;
 678                      }
 679              
 680                      quote = *p++;
 681                      value = p;
 682              
 683                      valueEnd = _ReduceAttrValue(self, &p, quote);
 684              
 685                      if (self->status)
 686                      {
 687                          /* Propagate error */
 688                          return;
 689                      }
 690              
 691                      /* Expect closing quote */
 692                      if (*p++ != quote)
 693                      {
 694 krisbash 1.4             XML_Raise(self, XML_ERROR_EXPECTED_ATTRIBUTE_CLOSING_QUOTES, tcs(name));
 695 mike     1.1             return;
 696                      }
 697              
 698                      /* Null-terminate the value */
 699                      *valueEnd = '\0';
 700                  }
 701              
 702                  /* Skip spaces */
 703                  p = _SkipSpaces(self, p);
 704              
 705 krisbash 1.4     /* Separate the prefix and tag */
 706                  if (colon)
 707                  {
 708                      *colon = '\0';
 709                      tag = colon + 1;
 710                      prefix = name;
 711                  }
 712                  /* else prefix is "" and name is the tag */
 713              
 714                  /* If the first/only token is "xmlns", extract namespace */
 715 mike     1.1     if (name[0] == 'x' &&
 716                      name[1] == 'm' &&
 717                      name[2] == 'l' &&
 718                      name[3] == 'n' &&
 719 krisbash 1.4         name[4] == 's' &&
 720                      name[5] == '\0')
 721 mike     1.1     {
 722 krisbash 1.4         unsigned int tagCode = 0;
 723              
 724                      /* The namespace of the xmlns:x attribute is fixed */
 725                      prefix = T("xmlns");
 726              
 727                      if (colon)
 728 mike     1.1         {
 729 krisbash 1.4             /* For non-default namespaces */
 730                          tagCode = _HashCode(tag, nameEnd - tag);
 731 mike     1.1         }
 732              
 733                      /* Add new namespace entry */
 734                      {
 735                          /* Check for stack overflow */
 736                          if (self->nameSpacesSize == XML_MAX_NAMESPACES)
 737                          {
 738 krisbash 1.4                 XML_Raise(self, XML_ERROR_TOO_MANY_NAMESPACES,
 739 mike     1.1                     (int)XML_MAX_NAMESPACES);
 740                              return;
 741                          }
 742                          {
 743 krisbash 1.4                 XML_NameSpace* newNs = &self->nameSpaces[self->nameSpacesSize++];
 744                              newNs->name = colon ? tag : T("");
 745                              newNs->nameCode = tagCode;
 746                              newNs->id = _FindNamespaceID(self, value, valueEnd - value);
 747                              newNs->uri = value;
 748                              newNs->uriSize = valueEnd - value;
 749                              newNs->depth = self->stackSize;
 750 mike     1.1             }
 751                      }
 752                  }
 753              
 754                  /* Append attribute to element */
 755                  {
 756                      XML_Attr* attr;
 757              
 758                      /* Check for attribute array overflow */
 759                      if (elem->attrsSize == XML_MAX_ATTRIBUTES)
 760                      {
 761 krisbash 1.4             elem->data.data[elem->data.size] = 0;   //May not have been null termated yet
 762                          XML_Raise(self, XML_ERROR_TOO_MANY_ATTRIBUTES, tcs(elem->data.data), (int)XML_MAX_ATTRIBUTES);
 763 mike     1.1             return;
 764                      }
 765              
 766                      attr = &elem->attrs[elem->attrsSize++];
 767 krisbash 1.4         attr->name.data = tag;
 768                      attr->name.size = nameEnd - tag;
 769 mike     1.1         attr->value = value;
 770 krisbash 1.4         attr->valueSize = valueEnd - value;
 771              
 772                      /* Save the namespace prefix, which will be translated by the caller */
 773                      attr->name.namespaceUri = prefix;
 774 mike     1.1     }
 775              
 776                  *pInOut = p;
 777              }
 778              
 779              static void _ParseProcessingInstruction(
 780 krisbash 1.4     _Inout_ XML* self, 
 781                  _Inout_ XML_Elem* elem, 
 782                  _In_z_ XML_Char* p)
 783 mike     1.1 {
 784                  /* <?xml version="1.0" encoding="UTF-8" standalone="yes"?> */
 785 krisbash 1.4     XML_Char* start;
 786                  XML_Char* end;
 787 mike     1.1 
 788                  /* Advance past '?' character */
 789 krisbash 1.4     if (*p)
 790                  {
 791                      p++;
 792                  }
 793 mike     1.1 
 794                  /* Get tag identifier */
 795                  {
 796                      start = p;
 797              
 798 krisbash 1.4         if (*p)
 799                      {
 800                          p = _SkipInner(p);
 801                      }
 802 mike     1.1 
 803                      if (*p == ':')
 804                      {
 805                          p++;
 806 krisbash 1.4             if (*p)
 807                          {
 808                              p = _SkipInner(p);
 809                          }
 810 mike     1.1         }
 811              
 812 krisbash 1.4         /* If input exhausted */
 813 mike     1.1         if (*p == '\0')
 814                      {
 815 krisbash 1.4             XML_Raise(self, XML_ERROR_END_OF_XML_INSTRUCTION);
 816 mike     1.1             return;
 817                      }
 818              
 819                      end = p;
 820                  }
 821              
 822                  /* Skip spaces */
 823 krisbash 1.4     if (*p)
 824                  {
 825                      p = _SkipSpaces(self, p);
 826                  }
 827              
 828                  elem->type = XML_INSTRUCTION;
 829                  elem->data.data = start;
 830                  elem->data.size = end - start;
 831 mike     1.1 
 832                  /* Process attributes */
 833              
 834                  while (*p && *p != '?')
 835                  {
 836                      _ParseAttr(self, elem, &p);
 837              
 838                      if (self->status)
 839                      {
 840                          /* Propagate error */
 841                          return;
 842                      }
 843                  }
 844              
 845 krisbash 1.4     if (*p)
 846                  {
 847                      p++;
 848                  }
 849 mike     1.1 
 850                  /* Skip spaces */
 851 krisbash 1.4     if (*p)
 852                  {
 853                      p = _SkipSpaces(self, p);
 854                  }
 855 mike     1.1 
 856                  /* Expect '>' */
 857                  if (*p++ != '>')
 858                  {
 859 krisbash 1.4         XML_Raise(self, XML_ERROR_END_OF_INSTRUCTION_MISSING);
 860 mike     1.1         return;
 861                  }
 862              
 863                  /* Return element object */
 864 krisbash 1.4     elem->data.namespaceUri = T("");
 865                  elem->data.namespaceUriSize = 0;
 866                  elem->data.namespaceId = '\0';
 867 mike     1.1     *end = '\0';
 868              
 869                  self->ptr = p;
 870              
 871                  if (self->foundRoot)
 872                      self->state = STATE_CHARS;
 873                  else
 874                      self->state = STATE_START;
 875              }
 876              
 877              static void _ParseStartTag(
 878 krisbash 1.4     _Inout_ XML* self, 
 879                  _Inout_ XML_Elem* elem, 
 880                  _In_z_ XML_Char* p)
 881              {
 882                  XML_Char* name;
 883                  XML_Char* nameEnd;
 884                  XML_Char* colon = NULL;
 885                  XML_Char* prefix = T("");
 886                  const XML_NameSpace* ns;
 887                  size_t attr;
 888 mike     1.1 
 889                  /* Found the root */
 890                  self->foundRoot = 1;
 891              
 892                  /* Get tag identifier */
 893                  {
 894                      name = p;
 895              
 896 krisbash 1.4         if (!*p || !_IsFirst(*p++))
 897 mike     1.1         {
 898 krisbash 1.4             XML_Raise(self, XML_ERROR_ELEMENT_NAME_EXPECTED);
 899 mike     1.1             return;
 900                      }
 901              
 902 krisbash 1.4         if (*p)
 903                      {
 904                          p = _SkipInner(p);
 905                      }
 906 mike     1.1 
 907                      if (*p == ':')
 908                      {
 909                          colon = p++;
 910 krisbash 1.4             if (*p)
 911                          {
 912                              p = _SkipInner(p);
 913                          }
 914 mike     1.1         }
 915              
 916                      /* If input exhuasted */
 917                      if (*p == '\0')
 918                      {
 919 krisbash 1.4             XML_Raise(self, XML_ERROR_ELEMENT_NAME_PREMATURE_END);
 920 mike     1.1             return;
 921                      }
 922              
 923                      nameEnd = p;
 924                  }
 925              
 926 krisbash 1.4     if (colon)
 927                  {
 928                      *colon = '\0';
 929                      prefix = name;
 930                      name = colon + 1;
 931                  }
 932              
 933 mike     1.1     /* Skip spaces */
 934                  p = _SkipSpaces(self, p);
 935              
 936 krisbash 1.4     elem->type = XML_START;
 937                  elem->data.data = name;
 938                  elem->data.size = nameEnd - name;
 939              
 940 mike     1.1     /* Process attributes */
 941                  while (*p && *p != '/' && *p != '>')
 942                  {
 943                      _ParseAttr(self, elem, &p);
 944              
 945                      if (self->status)
 946                          return;
 947                  }
 948              
 949 krisbash 1.4     /* Translate the namespace after parsing xmlns attributes */
 950                  ns = _FindNamespace(self, prefix);
 951              
 952                  if (self->status)
 953                      return;
 954              
 955                  /* Now translate the attribute namespaces */
 956                  /* Unprefixed attributes get a empty namespace */
 957                  for (attr = 0; attr < elem->attrsSize; attr++)
 958                  {
 959                      static const XML_NameSpace s_empty = {T(""), 0, T(""), 0, '\0', 0};
 960                      XML_Attr* item = elem->attrs + attr;
 961                      const XML_NameSpace* itemNS = &s_empty;
 962              
 963                      if (item->name.namespaceUri[0] != '\0')
 964                      {
 965                          /* The namespaceUri field contains the prefix */
 966                          itemNS = _FindNamespace(self, item->name.namespaceUri);
 967              
 968                          if (self->status)
 969                              return;
 970 krisbash 1.4         }
 971              
 972                      item->name.namespaceUri = itemNS->uri;
 973                      item->name.namespaceUriSize = itemNS->uriSize;
 974                      item->name.namespaceId = itemNS->id;
 975                  }
 976              
 977                  /* Create the element */
 978                  elem->type = XML_START;
 979                  elem->data.data = name;
 980                  elem->data.size = nameEnd - name;
 981                  elem->data.namespaceUri = ns->uri;
 982                  elem->data.namespaceUriSize = ns->uriSize;
 983                  elem->data.namespaceId = ns->id;
 984              
 985 mike     1.1     /* Check for empty tag */
 986                  if (*p == '/')
 987                  {
 988                      p++;
 989              
 990                      /* Null-terminate the tag */
 991                      *nameEnd = '\0';
 992              
 993                      /* Inject an empty tag onto element stack */
 994                      {
 995                          /* Check for stack overflow */
 996                          if (self->elemStackSize == XML_MAX_NESTED)
 997                          {
 998 krisbash 1.4                 XML_Raise(self, XML_ERROR_ELEMENT_DEPTH_OVERFLOW, 
 999                                  (int)XML_MAX_NESTED);
1000 mike     1.1                 return;
1001                          }
1002              
1003                          self->elemStack[self->elemStackSize] = *elem;
1004                          self->elemStack[self->elemStackSize].type = XML_END;
1005                          self->elemStackSize++;
1006                          self->nesting++;
1007                      }
1008              
1009                      /* Skip space */
1010 krisbash 1.4         if (*p)
1011                      {
1012                          p = _SkipSpaces(self, p);
1013                      }
1014 mike     1.1 
1015                      /* Expect '>' */
1016                      if (*p++ != '>')
1017                      {
1018 krisbash 1.4             *nameEnd = '\0';
1019                          XML_Raise(self, XML_ERROR_ELEMENT_NAME_NOT_CLOSED, tcs(elem->data.data));
1020 mike     1.1             return;
1021                      }
1022              
1023                      self->ptr = p;
1024                      self->state = STATE_CHARS;
1025                      return;
1026                  }
1027              
1028                  /* Expect '>' */
1029                  if (*p++ != '>')
1030                  {
1031 krisbash 1.4         *nameEnd = '\0';
1032                      XML_Raise(self, XML_ERROR_ELEMENT_NAME_NOT_CLOSED, tcs(elem->data.data));
1033 mike     1.1         return;
1034                  }
1035              
1036                  /* Zero-terminate the name tag */
1037                  *nameEnd = '\0';
1038              
1039                  /* Push opening tag */
1040                  {
1041                      if (self->stackSize == XML_MAX_NESTED)
1042                      {
1043 krisbash 1.4             XML_Raise(self, XML_ERROR_ELEMENT_DEPTH_OVERFLOW, 
1044                              (int)XML_MAX_NESTED);
1045 mike     1.1             return;
1046                      }
1047              
1048 krisbash 1.4         self->stack[self->stackSize] = elem->data;
1049 mike     1.1         self->stackSize++;
1050                      self->nesting++;
1051                  }
1052              
1053                  self->ptr = p;
1054              
1055                  if (self->foundRoot)
1056                      self->state = STATE_CHARS;
1057                  else
1058                      self->state = STATE_START;
1059              }
1060              
1061              static void _ParseEndTag(
1062 krisbash 1.4     _Inout_ XML* self, 
1063                  _Inout_ XML_Elem* elem, 
1064                  _In_z_ XML_Char* p)
1065 mike     1.1 {
1066                  /* Closing element: </name> */
1067 krisbash 1.4     XML_Char* name;
1068                  XML_Char* nameEnd;
1069                  XML_Char* colon = NULL;
1070                  XML_Char* prefix = T("");
1071                  const XML_NameSpace *ns;
1072 mike     1.1 
1073 krisbash 1.4     if (*p)
1074                  {
1075                      p++;
1076                  }
1077 mike     1.1 
1078                  /* Skip space */
1079 krisbash 1.4     if (*p)
1080                  {
1081                      p = _SkipSpaces(self, p);
1082                  }
1083 mike     1.1 
1084                  name = p;
1085              
1086                  /* Skip name */
1087                  {
1088 krisbash 1.4         if (!*p || !_IsFirst(*p++))
1089 mike     1.1         {
1090 krisbash 1.4             XML_Raise(self, XML_ERROR_ELEMENT_NAME_EXPECTED_ELEM_END);
1091 mike     1.1             return;
1092                      }
1093              
1094 krisbash 1.4         if (*p)
1095                      {
1096                          p = _SkipInner(p);
1097                      }
1098 mike     1.1 
1099                      if (*p == ':')
1100                      {
1101                          colon = p++;
1102 krisbash 1.4             if (*p)
1103                          {
1104                              p = _SkipInner(p);
1105                          }
1106 mike     1.1         }
1107                  }
1108              
1109                  /* If input exhuasted */
1110                  if (*p == '\0')
1111                  {
1112 krisbash 1.4         XML_Raise(self, XML_ERROR_ELEMENT_NAME_PREMATURE_END_ELEM_END);
1113 mike     1.1         return;
1114                  }
1115              
1116                  nameEnd = p;
1117              
1118 krisbash 1.4     if (colon)
1119                  {
1120                      *colon = '\0';
1121                      prefix = name;
1122                      name = colon + 1;
1123                  }
1124              
1125 mike     1.1     /* Skip spaces */
1126                  p = _SkipSpaces(self, p);
1127              
1128                  /* Expect '>' */
1129                  if (*p++ != '>')
1130                  {
1131 krisbash 1.4         XML_Raise(self,XML_ERROR_ELEMENT_NAME_NOT_CLOSED_ELEM_END, tcs(name));
1132 mike     1.1         return;
1133                  }
1134              
1135                  /* Null terminate name */
1136                  *nameEnd = '\0';
1137              
1138 krisbash 1.4     ns = _FindNamespace(self, prefix);
1139              
1140                  if (self->status)
1141                      return;
1142 mike     1.1 
1143                  /* Return element object */
1144                  elem->type = XML_END;
1145 krisbash 1.4     elem->data.data = name;
1146                  elem->data.size = nameEnd - name;
1147                  elem->data.namespaceUri = ns->uri;
1148                  elem->data.namespaceUriSize = ns->uriSize;
1149                  elem->data.namespaceId = ns->id;
1150 mike     1.1 
1151                  /* Match opening name */
1152                  {
1153                      /* Check for stack underflow */
1154                      if (self->stackSize-- == 0)
1155                      {
1156 krisbash 1.4             XML_Raise(self, XML_ERROR_ELEMENT_TOO_MANY_ENDS, tcs(name));
1157 mike     1.1             return;
1158                      }
1159              
1160                      self->nesting--;
1161              
1162                      /* Check that closing name matches opening name */
1163                      {
1164                          XML_Name* xn = &self->stack[self->stackSize];
1165              
1166 krisbash 1.4             if (XML_strcmp(xn->data, name) != 0 ||
1167                              xn->namespaceId != ns->id ||
1168                              (ns->id == 0 && XML_strcmp(xn->namespaceUri, ns->uri) != 0))
1169 mike     1.1             {
1170 krisbash 1.4                 XML_Raise(self, XML_ERROR_ELEMENT_END_ELEMENT_TAG_NOT_MATCH_START_TAG, 
1171                                  tcs(self->stack[self->stackSize].data), tcs(name));
1172 mike     1.1                 return;
1173                          }
1174                      }
1175                  }
1176              
1177                  /* Remove namespaces that have just gone out of scope */
1178                  {
1179                      size_t i;
1180                      size_t n = 0;
1181              
1182                      for (i = self->nameSpacesSize; i--; )
1183                      {
1184                          if (self->nameSpaces[i].depth >= self->stackSize)
1185                              n++;
1186                      }
1187              
1188                      if (n)
1189                      {
1190                          self->nameSpacesSize -= n;
1191              
1192                          /* Clear single-entry cache */
1193 mike     1.1             if (self->nameSpacesCacheIndex >= self->nameSpacesSize)
1194                              self->nameSpacesCacheIndex = (size_t)-1;
1195                      }
1196                  }
1197              
1198                  /* Set next state */
1199                  self->ptr = p;
1200                  self->state = STATE_CHARS;
1201              }
1202              
1203              static void _ParseComment(
1204 krisbash 1.4     _Inout_ XML* self, 
1205                  _Inout_ XML_Elem* elem, 
1206                  _In_z_ XML_Char* p)
1207 mike     1.1 {
1208                  /* Comment: <!-- blah blah blah --> */
1209 krisbash 1.4     XML_Char* start;
1210                  XML_Char* end;
1211 mike     1.1 
1212 krisbash 1.4     p = _SkipChars(p, 2);
1213                  if (!*p)
1214                  {
1215                      XML_Raise(self, XML_ERROR_COMMENT_PREMATURE_END);
1216                      return;
1217                  }
1218 mike     1.1     start = p;
1219              
1220                  while (*p)
1221                  {
1222                      if (p[0] == '-' && p[1] == '-')
1223                      {
1224                          if (p[2] != '>')
1225                          {
1226 krisbash 1.4                 XML_Raise(self, XML_ERROR_COMMENT_END_EXPECTED);
1227 mike     1.1                 return;
1228                          }
1229              
1230                          /* Null-terminate this comment */
1231                          end = p;
1232                          p += 3;
1233              
1234                          /* Prepare element */
1235                          elem->type = XML_COMMENT;
1236 krisbash 1.4             elem->data.data = start;
1237                          elem->data.size = end - start;
1238                          elem->data.namespaceUri = T("");
1239                          elem->data.namespaceUriSize = 0;
1240                          elem->data.namespaceId = '\0';
1241 mike     1.1             *end = '\0';
1242              
1243                          /* Set next state */
1244                          self->ptr = p;
1245              
1246                          if (self->foundRoot)
1247                              self->state = STATE_CHARS;
1248                          else
1249                              self->state = STATE_START;
1250              
1251                          return;
1252                      }
1253                      else if (p[0] == '\n')
1254                          self->line++;
1255              
1256                      p++;
1257                  }
1258              
1259 krisbash 1.4     XML_Raise(self, XML_ERROR_COMMENT_PREMATURE_END);
1260 mike     1.1 }
1261              
1262              static void _ParseCDATA(
1263 krisbash 1.4     _Inout_ XML* self, 
1264                  _Inout_ XML_Elem* elem, 
1265                  _In_z_ XML_Char* p)
1266 mike     1.1 {
1267                  /* <![CDATA[...]]> */
1268 krisbash 1.4     XML_Char* start;
1269                  XML_Char* end;
1270 mike     1.1 
1271 krisbash 1.4     p = _SkipChars(p, 7);
1272                  if (!*p)
1273                  {
1274                      XML_Raise(self, XML_ERROR_CDATA_PREMATURE_END);
1275                      return;
1276                  }
1277 mike     1.1     start = p;
1278              
1279                  while (*p)
1280                  {
1281                      if (p[0] == ']' && p[1] == ']' && p[2] == '>')
1282                      {
1283                          end = p;
1284                          p += 3;
1285              
1286                          /* Prepare element */
1287                          elem->type = XML_CHARS;
1288 krisbash 1.4             elem->data.data = start;
1289                          elem->data.size = end - start;
1290                          elem->data.namespaceUri = T("");
1291                          elem->data.namespaceUriSize = 0;
1292                          elem->data.namespaceId = '\0';
1293 mike     1.1             *end = '\0';
1294              
1295                          /* Set next state */
1296                          self->ptr = p;
1297                          self->state = STATE_CHARS;
1298              
1299                          return;
1300              
1301                      }
1302                      else if (p[0] == '\n')
1303                          self->line++;
1304              
1305                      p++;
1306                  }
1307              
1308 krisbash 1.4     XML_Raise(self, XML_ERROR_CDATA_PREMATURE_END);
1309 mike     1.1     return;
1310              }
1311              
1312              static void _ParseDOCTYPE(
1313 krisbash 1.4     _Inout_ XML* self, 
1314                  _Inout_ XML_Elem* elem, 
1315                  _In_z_ XML_Char* p)
1316 mike     1.1 {
1317                  MI_UNUSED(elem);
1318              
1319                  /* Recognize <!DOCTYPE ...> */
1320 krisbash 1.4     p = _SkipChars(p, 7);
1321                  if (!*p)
1322                  {
1323                      XML_Raise(self, XML_ERROR_DOCTYPE_PREMATURE_END);
1324                      return;
1325                  }
1326 mike     1.1 
1327                  while (*p && *p != '>')
1328                  {
1329                      if (p[0] == '\n')
1330                          self->line++;
1331              
1332                      p++;
1333                  }
1334              
1335                  if (*p++ != '>')
1336                  {
1337 krisbash 1.4         XML_Raise(self, XML_ERROR_DOCTYPE_PREMATURE_END);
1338 mike     1.1         return;
1339                  }
1340              
1341                  /* Set next state */
1342                  self->ptr = p;
1343              
1344                  if (self->foundRoot)
1345                      self->state = STATE_CHARS;
1346                  else
1347                      self->state = STATE_START;
1348              }
1349              
1350              static int _ParseCharData(
1351 krisbash 1.4     _Inout_ XML* self, 
1352                  _Inout_ XML_Elem* elem, 
1353                  _In_z_ XML_Char* p)
1354 mike     1.1 {
1355 krisbash 1.4     XML_Char* start;
1356                  XML_Char* end;
1357 mike     1.1 
1358                  /* Reject input if it does appear inside tags */
1359                  if (self->stackSize == 0)
1360                  {
1361 krisbash 1.4         /* Finished parsing document */
1362                      self->status = 1;
1363                      self->ptr = p;
1364 mike     1.1         return 0;
1365                  }
1366              
1367                  if (*p == '<')
1368                  {
1369                      self->ptr = p + 1;
1370                      self->state = STATE_TAG;
1371                      return 0;
1372                  }
1373              
1374                  /* Save pointer to start of data */
1375                  start = p;
1376              
1377                  /* reduce character data */
1378                  end = _ReduceCharData(self, &p);
1379              
1380                  if (self->status)
1381                  {
1382                      /* Propagate error */
1383                      return 0;
1384                  }
1385 mike     1.1 
1386                  /* Process character data */
1387                  if (*p != '<')
1388                  {
1389 krisbash 1.4         XML_Raise(self, XML_ERROR_CHARDATA_EXPECTED_ELEMENT_END_TAG);
1390 mike     1.1         return 0;
1391                  }
1392              
1393                  /* Set next state */
1394                  self->ptr = p + 1;
1395                  self->state = STATE_TAG;
1396              
1397                  /* Return character data element if non-empty */
1398                  if (end == start)
1399                      return 0;
1400              
1401                  /* Prepare element */
1402                  *end = '\0';
1403                  elem->type = XML_CHARS;
1404 krisbash 1.4     elem->data.data = start;
1405                  elem->data.size = end - start;
1406                  elem->data.namespaceUri = T("");
1407                  elem->data.namespaceUriSize = 0;
1408                  elem->data.namespaceId = '\0';
1409 mike     1.1 
1410                  /* Return 1 to indicate non-empty element */
1411                  return 1;
1412              }
1413              
1414              /*
1415              **==============================================================================
1416              **
1417              ** Public definitions
1418              **
1419              **==============================================================================
1420              */
1421              
1422 krisbash 1.4 const XML_Char* XML_Elem_GetAttr(
1423                  _Inout_ XML_Elem* self,
1424                  XML_Char nsId,
1425                  _In_z_ const XML_Char* name)
1426 mike     1.1 {
1427                  size_t i;
1428              
1429                  for (i = 0; i < self->attrsSize; i++)
1430                  {
1431 krisbash 1.4         if (nsId == self->attrs[i].name.namespaceId &&
1432                          XML_strcmp(name, self->attrs[i].name.data) == 0)
1433 mike     1.1             return self->attrs[i].value;
1434                  }
1435              
1436                  /* Not found! */
1437                  return NULL;
1438              }
1439              
1440              void XML_Init(
1441 krisbash 1.4     _Out_ XML* self)
1442 mike     1.1 {
1443                  memset(self, 0, sizeof(XML));
1444              
1445                  self->nameSpacesCacheIndex = (size_t)-1;
1446              }
1447              
1448              void XML_SetText(
1449 krisbash 1.4     _Inout_ XML* self,
1450                  _In_z_ XML_Char* text)
1451 mike     1.1 {
1452                  self->text = text;
1453                  self->ptr = text;
1454                  self->line = 1;
1455                  self->state = STATE_START;
1456              }
1457              
1458              int XML_Next(
1459 krisbash 1.4     _Inout_ XML* self,
1460                  _Out_ XML_Elem* elem)
1461 mike     1.1 {
1462                  if (self->elemStackSize)
1463                  {
1464                      *elem = self->elemStack[--self->elemStackSize];
1465                      self->nesting--;
1466                      return 0;
1467                  }
1468              
1469                  elem->attrsSize = 0;
1470              
1471                  for (;;)
1472                  {
1473                      switch (self->state)
1474                      {
1475                          case STATE_START:
1476                          {
1477 krisbash 1.4                 XML_Char* p = self->ptr;
1478 mike     1.1 
1479                              /* Skip spaces */
1480                              p = _SkipSpaces(self, p);
1481              
1482                              /* Expect '<' */
1483                              if (*p != '<')
1484                              {
1485 krisbash 1.4                     XML_Raise(self, XML_ERROR_OPEN_ANGLE_BRACKET_EXPECTED);
1486 mike     1.1                     return -1;
1487                              }
1488              
1489                              self->ptr = p + 1;
1490                              self->state = STATE_TAG;
1491                              break;
1492                          }
1493                          case STATE_TAG:
1494                          {
1495 krisbash 1.4                 XML_Char* p = self->ptr;
1496 mike     1.1 
1497                              /* Skip spaces */
1498                              p = _SkipSpaces(self, p);
1499              
1500                              /* Expect one of these */
1501                              if (*p == '/')
1502                              {
1503                                  _ParseEndTag(self, elem, p);
1504                                  return self->status;
1505                              }
1506                              else if (_IsFirst(*p))
1507                              {
1508                                  _ParseStartTag(self, elem, p);
1509                                  return self->status;
1510                              }
1511                              else if (*p == '?')
1512                              {
1513                                  _ParseProcessingInstruction(self, elem, p);
1514                                  return self->status;
1515                              }
1516                              else if (*p == '!')
1517 mike     1.1                 {
1518                                  p++;
1519              
1520                                  if (p[0] == '-' && p[1] == '-')
1521                                  {
1522                                      _ParseComment(self, elem, p);
1523                                      return self->status;
1524                                  }
1525 krisbash 1.4                     else if (
1526                                      memcmp(p, T("[CDATA["), 7 * sizeof(XML_Char)) == 0)
1527 mike     1.1                     {
1528                                      _ParseCDATA(self, elem, p);
1529                                      return self->status;
1530                                  }
1531 krisbash 1.4                     else if (
1532                                      memcmp(p, T("DOCTYPE"), 7 * sizeof(XML_Char)) == 0)
1533 mike     1.1                     {
1534                                      _ParseDOCTYPE(self, elem, p);
1535              
1536                                      if (self->status)
1537                                          return -1;
1538              
1539                                      break;
1540                                  }
1541                                  else
1542                                  {
1543 krisbash 1.4                         XML_Raise(self, XML_ERROR_COMMENT_CDATA_DOCTYPE_EXPECTED);
1544 mike     1.1                         return -1;
1545                                  }
1546                              }
1547                              else
1548                              {
1549 krisbash 1.4                     XML_Raise(self, XML_ERROR_ELEMENT_EXPECTED);
1550 mike     1.1                     return-1;
1551                              }
1552                              break;
1553                          }
1554                          case STATE_CHARS:
1555                          {
1556 krisbash 1.4                 XML_Char* p = self->ptr;
1557 mike     1.1 
1558                              if (_ParseCharData(self, elem, p) == 1)
1559                              {
1560                                  /* Return character data to caller */
1561                                  return 0;
1562                              }
1563              
1564                              if (self->status)
1565                                  return self->status;
1566              
1567                              /* empty character data */
1568                              break;
1569                          }
1570                          default:
1571                          {
1572 krisbash 1.4                 XML_Raise(self, XML_ERROR_UNEXPECTED_STATE);
1573 mike     1.1                 return -1;
1574                          }
1575                      }
1576                  }
1577              
1578                  //return 0;
1579              }
1580              
1581              int XML_Expect(
1582 krisbash 1.4     _Inout_ XML* self,
1583                  _Out_ XML_Elem* elem,
1584 mike     1.1     XML_Type type,
1585 krisbash 1.4     XML_Char nsId,
1586                  _In_z_ const XML_Char* name)
1587 mike     1.1 {
1588                  if (XML_Next(self, elem) == 0 && 
1589                      elem->type == type && 
1590 krisbash 1.4         nsId == elem->data.namespaceId &&
1591                      (!name || XML_strcmp(elem->data.data, name) == 0))
1592 mike     1.1     {
1593                      return 0;
1594                  }
1595              
1596                  if (type == XML_START)
1597 krisbash 1.4         XML_Raise(self, XML_ERROR_ELEMENT_NAME_EXPECTED, 
1598                          tcs(name), tcs(elem->data.data));
1599 mike     1.1     else if (type == XML_END)
1600 krisbash 1.4         XML_Raise(self, XML_ERROR_SPECIFIC_END_ELEMENT_EXPECTED,
1601                          tcs(name), tcs(elem->data.data));
1602 mike     1.1     else if (type == XML_CHARS)
1603 krisbash 1.4         XML_Raise(self, XML_ERROR_CHARACTER_DATA_EXPECTED);
1604 mike     1.1 
1605                  return -1;
1606              }
1607              
1608              int XML_Skip(
1609 krisbash 1.4     _Inout_ XML* self)
1610 mike     1.1 {
1611                  XML_Elem tmp;
1612                  size_t nesting = self->nesting;
1613              
1614                  while (self->nesting >= nesting)
1615                  {
1616                      if (XML_Next(self, &tmp) != 0)
1617                          return -1;
1618                  }
1619              
1620                  return 0;
1621              }
1622              
1623              int XML_RegisterNameSpace(
1624 krisbash 1.4     _Inout_ XML* self,
1625                  XML_Char id,
1626                  _In_z_ const XML_Char* uri)
1627 mike     1.1 {
1628                  XML_RegisteredNameSpace rns;
1629                  /* ATTN: we do not check for duplicates */
1630              
1631                  /* Reject out of range ids */
1632                  if (id < 'a' || id > 'z')
1633                      return -1;
1634              
1635                  /* Check for overflow of the array */
1636                  if (self->registeredNameSpacesSize == XML_MAX_REGISTERED_NAMESPACES)
1637                      return -1;
1638              
1639                  rns.id = id;
1640                  rns.uri = uri;
1641 krisbash 1.4     rns.uriCode = _HashCode(uri, XML_strlen(uri));
1642 mike     1.1 
1643                  self->registeredNameSpaces[self->registeredNameSpacesSize++] = rns;
1644              
1645                  return 0;
1646              }
1647              
1648              int XML_PutBack(
1649 krisbash 1.4     _Inout_ XML* self,
1650                  _In_ const XML_Elem* elem)
1651 mike     1.1 {
1652                  /* Check for stack overflow */
1653                  if (self->elemStackSize == XML_MAX_NESTED)
1654                  {
1655 krisbash 1.4         XML_Raise(self, XML_ERROR_ELEMENT_DEPTH_OVERFLOW, 
1656                          (int)XML_MAX_NESTED);
1657 mike     1.1         return -1;
1658                  }
1659              
1660                  self->elemStack[self->elemStackSize++] = *elem;
1661                  return 0;
1662              }
1663              
1664 krisbash 1.4 #if defined(_MSC_VER)
1665              void XML_Raise(_Inout_ XML* self, unsigned formatStringId, ...)
1666 mike     1.1 {
1667 krisbash 1.4     HMODULE hModule;
1668                  XML_Char formatMsg[MAX_PATH];
1669                  va_list ap;
1670                  memset(&ap, 0, sizeof(ap));
1671              
1672                  self->status = -1;
1673                  self->message[0] = '\0';
1674              
1675                  if (GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (LPCTSTR ) XML_Raise, &hModule) == 0)
1676                  {
1677                      memcpy(self->message, T("Failed to parse XML."), sizeof(T("Failed to parse XML.")));
1678                  }
1679                  else
1680                  {
1681                      if(LoadStringW(hModule, formatStringId, formatMsg, MAX_PATH))
1682                      {
1683                          va_start(ap, formatStringId);
1684                          if (FormatMessageW(FORMAT_MESSAGE_FROM_STRING, formatMsg, 0, 0, self->message, sizeof(self->message)/sizeof(self->message[0]), &ap) == 0)
1685                          {
1686                              memcpy(self->message, T("Failed to parse XML."), sizeof(T("Failed to parse XML.")));
1687                          }
1688 krisbash 1.4             va_end(ap);
1689                      }
1690                      else
1691                      {
1692                          memcpy(self->message, T("Failed to parse XML."), sizeof(T("Failed to parse XML.")));
1693                      }
1694                  }
1695 mike     1.1 }
1696 krisbash 1.4 #else
1697 mike     1.1 
1698 krisbash 1.4 void XML_Raise(XML* self, _In_z_ const XML_Char* format, ...)
1699 mike     1.1 {
1700                  va_list ap;
1701              
1702                  self->status = -1;
1703                  self->message[0] = '\0';
1704              
1705 krisbash 1.4     memset(&ap, 0, sizeof(ap));
1706 mike     1.1     va_start(ap, format);
1707 krisbash 1.4     Vstprintf(self->message, MI_COUNT(self->message), format, ap);
1708 mike     1.1     va_end(ap);
1709              }
1710 krisbash 1.4 #endif
1711 mike     1.1 
1712 krisbash 1.4 void XML_FormatError(_Inout_ XML* self, _Out_writes_z_(size) XML_Char* buffer, size_t size)
1713 mike     1.1 {
1714 krisbash 1.4     *buffer = '\0';
1715 mike     1.1 
1716                  if (self->status == -1)
1717 krisbash 1.4     {
1718                      Stprintf(
1719                          buffer, 
1720                          size, 
1721                          PAL_T("%u: error: %T"), 
1722                          (unsigned int)self->line, 
1723                          tcs(self->message));
1724                  }
1725              }
1726              
1727              
1728              int XML_StripWhitespace(
1729                  _Inout_ XML_Elem* elem)
1730              {
1731                  if (elem->type != XML_CHARS)
1732                  {
1733                      return -1;
1734                  }
1735              
1736                  //Strip leading white space
1737                  while (elem->data.size && _IsSpace(*elem->data.data))
1738 krisbash 1.4     {
1739                      elem->data.data++;
1740                      elem->data.size--;
1741                  }
1742                  //Strip trailing white space
1743                  while(elem->data.size && _IsSpace(elem->data.data[elem->data.size-1]))
1744                  {
1745                      elem->data.data[elem->data.size-1] = T('\0');
1746                      elem->data.size--;
1747                  }
1748                  return 0;
1749 mike     1.1 }

ViewCVS 0.9.2