(file) Return to xml.c CVS log (file) (dir) Up to [OMI] / omi / xml / new

   1 krisbash 1.1 /*
   2              **==============================================================================
   3              **
   4              ** Open Management Infrastructure (OMI)
   5              **
   6              ** Copyright (c) Microsoft Corporation
   7              ** 
   8              ** Licensed under the Apache License, Version 2.0 (the "License"); you may not 
   9              ** use this file except in compliance with the License. You may obtain a copy 
  10              ** of the License at 
  11              **
  12              **     http://www.apache.org/licenses/LICENSE-2.0 
  13              **
  14              ** THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15              ** KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 
  16              ** WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 
  17              ** MERCHANTABLITY OR NON-INFRINGEMENT. 
  18              **
  19              ** See the Apache 2 License for the specific language governing permissions 
  20              ** and limitations under the License.
  21              **
  22 krisbash 1.1 **==============================================================================
  23              */
  24              
  25              #if defined(_MSC_VER)
  26              # include <windows.h>
  27              #endif
  28              
  29              #include "xml.h"
  30              #include <string.h>
  31              #include <stdio.h>
  32              #include <stdlib.h>
  33              #include <stdarg.h>
  34              #include <ctype.h>
  35              #ifdef _MSC_VER
  36              #pragma prefast (disable: 28252)
  37              #pragma prefast (disable: 28253)
  38              #endif
  39              #include <wchar.h>
  40              #include <stdarg.h>
  41              
  42              #if defined(__GNUC__) && (__GNUC__ >= 4)
  43 krisbash 1.1 # define PRINTF_FORMAT(N,M) __attribute__((format(printf, N, M)))
  44              #else
  45              # define PRINTF_FORMAT(N,M) /* empty */
  46              #endif
  47              
  48              static int XML_vsnprintf(char* buf, size_t size, const char* fmt, va_list ap)
  49              {
  50              #if defined(_MSC_VER)
  51                  return _vsnprintf_s(buf, size, size, fmt, ap);
  52              #else
  53                  return vsnprintf(buf, size, fmt, ap);
  54              #endif
  55              }
  56              
  57              static int XML_snprintf(char* buf, size_t size, const char* fmt, ...)
  58              {
  59                  va_list ap;
  60                  int r;
  61                  memset(&ap, 0, sizeof(ap));
  62                  va_start(ap, fmt);
  63              #if defined(_MSC_VER)
  64 krisbash 1.1     r = _vsnprintf_s(buf, size, size, fmt, ap);
  65              #else
  66                  r = vsnprintf(buf, size, fmt, ap);
  67              #endif
  68                  va_end(ap);
  69              
  70                  return r;
  71              }
  72              
  73              #include <pal/strings.h>
  74              
  75              #if defined(CONFIG_ENABLE_WCHAR)
  76              # define T(STR) L##STR
  77              # define XML_strtoul wcstoul
  78              # define XML_strlen wcslen
  79              # define XML_strcmp wcscmp
  80              # define XML_printf wprintf
  81              # define XML_fprintf fwprintf
  82              #else
  83              # define T(STR) STR
  84              # define XML_strtoul strtoul
  85 krisbash 1.1 # define XML_strlen strlen
  86              # define XML_strcmp strcmp
  87              # define XML_printf printf
  88              # define XML_fprintf fprintf
  89              #endif
  90              
  91              // Windows uses these identifiers:
  92              #if !defined(_MSC_VER)
  93              # define ID_MIUTILS_UNKNOWN 0
  94              # define ID_MIUTILS_XMLPARSER_BAD_ENTITY_REFERENCE 0
  95              # define ID_MIUTILS_XMLPARSER_BAD_CHARACTER_REFERENCE 1
  96              # define ID_MIUTILS_XMLPARSER_UNDEFINED_NAMESPACE_PREFIX 2
  97              # define ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_NAME 3
  98              # define ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_EQUALS 4
  99              # define ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_OPENING_QUOTES 5
 100              # define ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_CLOSING_QUOTES 6
 101              # define ID_MIUTILS_XMLPARSER_TOO_MANY_NAMESPACES 7
 102              # define ID_MIUTILS_XMLPARSER_TOO_MANY_ATTRIBUTES 8
 103              # define ID_MIUTILS_XMLPARSER_END_OF_XML_INSTRUCTION 9
 104              # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_EXPECTED 10
 105              # define ID_MIUTILS_XMLPARSER_END_OF_INSTRUCTION_MISSING 11
 106 krisbash 1.1 # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_PREMATURE_END 12
 107              # define ID_MIUTILS_XMLPARSER_ELEMENT_DEPTH_OVERFLOW 13
 108              # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_NOT_CLOSED 14
 109              # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_EXPECTED_ELEM_END 15
 110              # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_PREMATURE_END_ELEM_END 16
 111              # define ID_MIUTILS_XMLPARSER_ELEMENT_NAME_NOT_CLOSED_ELEM_END 17
 112              # define ID_MIUTILS_XMLPARSER_ELEMENT_TOO_MANY_ENDS 18
 113              # define ID_MIUTILS_XMLPARSER_ELEMENT_END_ELEMENT_TAG_NOT_MATCH_START_TAG 19
 114              # define ID_MIUTILS_XMLPARSER_COMMENT_PREMATURE_END 20
 115              # define ID_MIUTILS_XMLPARSER_COMMENT_END_EXPECTED 21
 116              # define ID_MIUTILS_XMLPARSER_CDATA_PREMATURE_END 22
 117              # define ID_MIUTILS_XMLPARSER_DOCTYPE_PREMATURE_END 23
 118              # define ID_MIUTILS_XMLPARSER_CHARDATA_EXPECTED_ELEMENT_END_TAG 24
 119              # define ID_MIUTILS_XMLPARSER_OPEN_ANGLE_BRACKET_EXPECTED 25
 120              # define ID_MIUTILS_XMLPARSER_COMMENT_CDATA_DOCTYPE_EXPECTED 26
 121              # define ID_MIUTILS_XMLPARSER_ELEMENT_EXPECTED 27
 122              # define ID_MIUTILS_XMLPARSER_UNEXPECTED_STATE 28
 123              #endif
 124              
 125              PRINTF_FORMAT(3, 4)
 126              void XML_Raise(
 127 krisbash 1.1     XML* self, 
 128                  unsigned formatStringId, 
 129                  const Char* format,
 130                  ...);
 131              
 132              //extern HMODULE g_hModule; /*From DllMain */
 133              
 134              /*
 135              **==============================================================================
 136              **
 137              ** Local definitions
 138              **
 139              **==============================================================================
 140              */
 141              
 142              typedef enum _XML_State
 143              {
 144                  STATE_START,
 145                  STATE_TAG,
 146                  STATE_CHARS,
 147              }
 148 krisbash 1.1 XML_State;
 149              
 150              INLINE int _IsSpace(Char c)
 151              {
 152                  /* Space characters include [\n\t\r ]
 153                   *     _spaceChar['\n'] => 1
 154                   *     _spaceChar['\r'] => 2
 155                   *     _spaceChar['\t'] => 2
 156                   *     _spaceChar[' '] => 2
 157                   */
 158                  static const unsigned char _table[256] =
 159                  {
 160                      0,0,0,0,0,0,0,0,0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 161                      2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 162                      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 163                      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 164                      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 165                      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 166                      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 167                      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 168                  };
 169 krisbash 1.1 
 170                  if (c < 256)
 171                      return _table[(unsigned int)c];
 172                  else
 173                      return 0;
 174              }
 175              
 176              /* Matches XML name characters of the form: [A-Za-z_][A-Za-z0-9_-.:]*
 177               *     _nameChar[A-Za-z_] => 2 (first character)
 178               *     _nameChar[A-Za-z0-9_-.:] => 1 or 2 (inner character)
 179               */
 180              static const unsigned char _nameCharTable[256] =
 181              {
 182                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 183                  0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
 184                  0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,2,
 185                  0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,
 186                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 187                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 188                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 189                  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 190 krisbash 1.1 };
 191              
 192              INLINE int _IsFirst(Char c)
 193              {
 194                  if (c < 256)
 195                      return _nameCharTable[(unsigned int)c] & 2;
 196                  else 
 197                      return 0;
 198              }
 199              
 200              INLINE int _IsInner(Char c)
 201              {
 202                  if (c < 256)
 203                      return _nameCharTable[(unsigned char)c];
 204                  else
 205                      return 0;
 206              }
 207              
 208              INLINE Char* _SkipInner(__in_z Char* p)
 209              {
 210                  while (*p && _IsInner(*p))
 211 krisbash 1.1         p++;
 212              
 213                  return p;
 214              }
 215              
 216              static Char* _SkipSpacesAux(__inout XML* self, __in_z Char* p)
 217              {
 218                  Char x;
 219                  size_t n = 0;
 220              
 221                  while (*p && (x = (Char)_IsSpace(*p)) != 0)
 222                  {
 223                      n += 0x01 & x;
 224                      p++;
 225                  }
 226              
 227                  self->line += n;
 228                  return p;
 229              }
 230              
 231              INLINE Char* _SkipSpaces(__inout XML* self, __in_z Char* p)
 232 krisbash 1.1 {
 233                  if ((p[0] == '\0') || !_IsSpace(p[0]))
 234                      return p;
 235              
 236                  if (p[0] == '\n')
 237                      self->line++;
 238              
 239                  if ((p[1] == '\0') || !_IsSpace(p[1]))
 240                      return &p[1];
 241              
 242                  if (p[1] == '\n')
 243                      self->line++;
 244              
 245                  if ((p[2] == '\0') || !_IsSpace(p[2]))
 246                      return &p[2];
 247              
 248                  if (p[2] == '\n')
 249                      self->line++;
 250              
 251                  return _SkipSpacesAux(self, &p[3]);
 252              }
 253 krisbash 1.1 
 254              INLINE Char* _ToEntityRef(
 255                  __inout XML* self, 
 256                  __in_z Char* p, 
 257                  __inout_z Char* ch)
 258              {
 259                  /* Note: we collected the following statistics on the frequency of
 260                   * each entity reference in a large body of XML documents:
 261                   *     
 262                   *     &quot; - 74,480 occurences
 263                   *     &apos; - 13,877 occurences
 264                   *     &lt;   -  9,919 occurences
 265                   *     &gt;   -  9,853 occurences
 266                   *     &amp;  -    111 occurences
 267                   *
 268                   * The cases below are organized in order of statistical frequency.
 269                   */
 270              
 271                  /* Match one of these: "lt;", "gt;", "amp;", "quot;", "apos;" */
 272              
 273                  if (p[0] == 'q' && p[1] == 'u' && p[2] == 'o' && p[3] == 't' && p[4] == ';')
 274 krisbash 1.1     {
 275                      *ch = '"';
 276                      return p + 5;
 277                  }
 278              
 279                  if (p[0] == 'a' && p[1] == 'p' && p[2] == 'o' && p[3] == 's' && p[4] == ';')
 280                  {
 281                      *ch = '\'';
 282                      return p + 5;
 283                  }
 284              
 285                  if (p[0] == 'l' && p[1] == 't' && p[2] == ';')
 286                  {
 287                      *ch = '<';
 288                      return p + 3;
 289                  }
 290              
 291                  if (p[0] == 'g' && p[1] == 't' && p[2] == ';')
 292                  {
 293                      *ch = '>';
 294                      return p + 3;
 295 krisbash 1.1     }
 296              
 297                  if (p[0] == 'a' && p[1] == 'm' && p[2] == 'p' && p[3] == ';')
 298                  {
 299                      *ch = '&';
 300                      return p + 4;
 301                  }
 302              
 303                  XML_Raise(
 304                      self, 
 305                      ID_MIUTILS_XMLPARSER_BAD_ENTITY_REFERENCE,
 306                      "bad entity reference");
 307                  return p;
 308              }
 309              
 310              INLINE Char* _ToCharRef(__inout XML* self, __in_z Char* p, __inout_z Char* ch)
 311              {
 312                  Char* end = NULL;
 313                  unsigned long x;
 314              
 315                  if (*p == 'x')
 316 krisbash 1.1     {
 317                      p++;
 318                      x = XML_strtoul(p, &end, 16);
 319                  }
 320                  else
 321                  {
 322                      x = XML_strtoul(p, &end, 10);
 323                  }
 324              
 325                  if (end == p || *end != ';' || x > 255)
 326                  {
 327                      *ch = '\0';
 328                      XML_Raise(
 329                          self, 
 330                          ID_MIUTILS_XMLPARSER_BAD_CHARACTER_REFERENCE,
 331                          "bad character reference");
 332                      return p;
 333                  }
 334              
 335                  *ch = (Char)x;
 336              
 337 krisbash 1.1     return end + 1;
 338              }
 339              
 340              INLINE Char* _ToRef(__inout XML* self, __in_z Char* p, __inout_z Char* ch)
 341              {
 342                  /* Examples:
 343                   *     &#64;
 344                   *     &xFF;
 345                   *     &amp;
 346                   *     &lt;
 347                   */
 348                  if (*p == '#')
 349                      return _ToCharRef(self, p + 1, ch);
 350                  else
 351                      return _ToEntityRef(self, p, ch);
 352              }
 353              
 354              
 355              static int _Match1(Char c)
 356              {
 357                  /* Matches all but '\0', '\'', '"', and '&'. All matching charcters
 358 krisbash 1.1      * yeild 2, except for '\n', which yields 1 
 359                   */
 360                  static const unsigned char _match[256] =
 361                  {
 362                      0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 363                      1,1,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 364                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 365                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 366                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 367                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 368                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 369                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 370                  };
 371              
 372                  if (c < 256)
 373                      return _match[(unsigned char)c];
 374                  else
 375                      return 1;
 376              }
 377              
 378              /* Reduce entity references and remove leading and trailing whitespace */
 379 krisbash 1.1 static Char* _ReduceAttrValue(
 380                  __inout XML* self, 
 381                  __deref_inout_z Char** pInOut, 
 382                  Char eos)
 383              {
 384                  Char* p = *pInOut;
 385                  Char* end;
 386                  size_t n = 0;
 387              
 388                  /* Skip uninteresting characters */
 389                  for (;;)
 390                  {
 391              #if defined(_MSC_VER)
 392              # pragma prefast(push)
 393              # pragma prefast (disable: 26018)
 394              #endif
 395                      while (*p && _Match1(*p))
 396              #if defined(_MSC_VER)
 397              # pragma prefast(pop)
 398              #endif
 399                          p++;
 400 krisbash 1.1 
 401                      if (*p != '\n')
 402                          break;
 403              
 404                      self->line++;
 405                      p++;
 406                  }
 407              
 408                  end = p;
 409              
 410                  while (*p && *p != eos)
 411                  {
 412                      if (*p == '&')
 413                      {
 414                          Char c = '\0';
 415                          Char* tmp;
 416                          
 417                          p++;
 418                          tmp = _ToRef(self, p, &c);
 419              
 420                          if (self->status)
 421 krisbash 1.1             {
 422                              /* Propagate error */
 423                              return NULL;
 424                          }
 425              
 426                          *end++ = c;
 427                          p = tmp;
 428                      }
 429                      else
 430                      {
 431                          if (*p == '\n')
 432                              n++;
 433              
 434                          *end++ = *p++;
 435                      }
 436                  }
 437              
 438                  *pInOut = p;
 439                  self->line += n;
 440              
 441                  return end;
 442 krisbash 1.1 }
 443              
 444              static int _Match2(Char c)
 445              {
 446                  /* Match all but these: '\0', '<', '&', '\n' */
 447                  static const unsigned char _match[256] =
 448                  {
 449                      0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 450                      1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,
 451                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 452                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 453                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 454                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 455                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 456                      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 457                  };
 458              
 459                  if (c < 256)
 460                      return _match[(unsigned char)c];
 461                  else
 462                      return 1;
 463 krisbash 1.1 }
 464              
 465              /* Reduce character data, advance p, and return pointer to end */
 466              static Char* _ReduceCharData(__inout XML* self, __deref_inout_z Char** pInOut)
 467              {
 468                  Char* p = *pInOut;
 469                  Char* end;
 470                  size_t n = 0;
 471              
 472                  for (;;)
 473                  {
 474              #if defined(_MSC_VER)
 475              # pragma prefast(push)
 476              # pragma prefast (disable: 26018)
 477              #endif
 478                      while (*p && (_Match2(*p)))
 479              #if defined(_MSC_VER)
 480              # pragma prefast(pop)
 481              #endif
 482                          p++;
 483              
 484 krisbash 1.1         if (*p != '\n')
 485                          break;
 486              
 487                      p++;
 488                      self->line++;
 489                  }
 490              
 491                  end = p;
 492              
 493                  /* Can we return now? */
 494                  if (*p == '<')
 495                  {
 496                      *pInOut = p;
 497                      self->line += n;
 498                      return end;
 499                  }
 500              
 501                  /* Seek next tag start */
 502                  while (*p && *p != '<')
 503                  {
 504                      if (*p == '&')
 505 krisbash 1.1         {
 506                          Char c = '\0';
 507                          Char* tmp;
 508                          
 509                          p++;
 510                          tmp = _ToRef(self, p, &c);
 511              
 512                          if (tmp == p)
 513                              return NULL;
 514              
 515                          *end++ = c;
 516                          p = tmp;
 517                      }
 518                      else
 519                      {
 520                          for (;;)
 521                          {
 522                              while (_Match2(*p))
 523                                  *end++ = *p++;
 524              
 525                              if (*p != '\n')
 526 krisbash 1.1                     break;
 527              
 528                              *end++ = *p++;
 529                              self->line++;
 530                          }
 531                      }
 532                  }
 533              
 534                  /* Document cannot end with character data */
 535                  if (*p == '\0')
 536                      return NULL;
 537              
 538                  *pInOut = p;
 539                  self->line += n;
 540              
 541                  return end;
 542              }
 543              
 544              INLINE unsigned int _HashCode(__in_ecount_z(n) const Char* s, size_t n)
 545              {
 546                  /* This hash algorithm excludes the first character since for many strings 
 547 krisbash 1.1      * (e.g., URIs) the first character is not unique. Instead the hash 
 548                   * comprises three components:
 549                   *     (1) The length
 550                   *     (3) The last chacter
 551                   */
 552                  return n ? (int)(n ^ s[n-1]) : 0;
 553              }
 554              
 555              /* Map a URI to a single character namespace identifier */
 556              static Char _FindNamespaceID(
 557                  __inout XML* self, 
 558                  __in_ecount_z(uriSize) const Char* uri,
 559                  size_t uriSize)
 560              {
 561                  size_t i;
 562                  unsigned int code = _HashCode(uri, uriSize);
 563              
 564                  /* Resolve from client namespace registrations */
 565                  for (i = 0; i < self->registeredNameSpacesSize; i++)
 566                  {
 567                      XML_RegisteredNameSpace* rns = &self->registeredNameSpaces[i];
 568 krisbash 1.1 
 569                      if (rns->uriCode == code && XML_strcmp(rns->uri, uri) == 0)
 570                          return rns->id;
 571                  }
 572              
 573                  /* Not found so return null id */
 574                  return '\0';
 575              }
 576              
 577              #define XML_NS "http://www.w3.org/XML/1998/namespace"
 578              #define T_XML_NS T("http://www.w3.org/XML/1998/namespace")
 579              #define XML_NS_LEN (sizeof(XML_NS) - 1)
 580              
 581              #define XMLNS_NS "http://www.w3.org/2000/xmlns/"
 582              #define T_XMLNS_NS T("http://www.w3.org/2000/xmlns/")
 583              #define XMLNS_NS_LEN (sizeof(XMLNS_NS) - 1)
 584              
 585              /* Map a prefix to its XML namespace
 586               * A non-empty prefix that is unmapped results in an error */
 587              static const XML_NameSpace* _FindNamespace(__inout XML* self, __in_z const Char* prefix)
 588              {
 589 krisbash 1.1     static const XML_NameSpace s_empty = {T(""), 0, T(""), 0, '\0', 0};
 590                  static const XML_NameSpace s_xml = {T("xml"), 3 ^ 'l', T_XML_NS, XML_NS_LEN, '\0', 0};
 591                  static const XML_NameSpace s_xmlns = {T("xmlns"), 5 ^ 's', T_XMLNS_NS, XMLNS_NS_LEN, '\0', 0};
 592                  unsigned int code = _HashCode(prefix, XML_strlen(prefix));
 593                  size_t i;
 594              
 595                  /* Special case: the 'xml' and 'xmlns' namespaces are fixed. */
 596                  if (prefix[0] == 'x' &&
 597                      prefix[1] == 'm' &&
 598                      prefix[2] == 'l')
 599                  {
 600                      if (prefix[3] == '\0')
 601                          return &s_xml;
 602                      else if (prefix[3] == 'n' &&
 603                               prefix[4] == 's' &&
 604                               prefix[5] == '\0')
 605                          return &s_xmlns;
 606                  }
 607              
 608                  /* First check single entry cache */
 609                  if (self->nameSpacesCacheIndex != (size_t)-1)
 610 krisbash 1.1     {
 611                      const XML_NameSpace* ns = &self->nameSpaces[self->nameSpacesCacheIndex];
 612              
 613                      if (ns->nameCode == code && XML_strcmp(ns->name, prefix) == 0)
 614                      {
 615                          return ns;
 616                      }
 617                  }
 618              
 619                  /* Translate name to the one found in the nameSpaces[] array */
 620                  for (i = self->nameSpacesSize; i--; )
 621                  {
 622                      const XML_NameSpace* ns = &self->nameSpaces[i];
 623              
 624                      if (ns->nameCode == code && XML_strcmp(ns->name, prefix) == 0)
 625                      {
 626                          /* Cache */
 627                          self->nameSpacesCacheIndex = i;
 628              
 629                          return ns;
 630                      }
 631 krisbash 1.1     }
 632              
 633                  /* No match
 634                   * For the empty prefix this means there is no namespace
 635                   * Otherwise, this is an error */
 636                  if (prefix[0])
 637                  {
 638                      XML_Raise(self, ID_MIUTILS_XMLPARSER_UNDEFINED_NAMESPACE_PREFIX, 
 639                          "undefined namespace prefix: %s", prefix);
 640                      return NULL;
 641                  }
 642              
 643                  return &s_empty;
 644              }
 645              
 646              static void _ParseAttr(
 647                  __inout XML* self, 
 648                  __inout XML_Elem* elem, 
 649                  __deref_inout_z Char** pInOut)
 650              {
 651                  Char* p = *pInOut;
 652 krisbash 1.1     Char* name;
 653                  Char* nameEnd;
 654                  Char* value;
 655                  Char* valueEnd;
 656                  Char* colon = NULL;
 657                  Char* tag = p;
 658                  Char* prefix = T("");
 659              
 660                  /* Parse the attribute name */
 661                  {
 662                      name = p;
 663              
 664                      if (!_IsFirst(*p))
 665                      {
 666                          XML_Raise(
 667                              self, 
 668                              ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_NAME,
 669                              "expected attribute name");
 670                          return;
 671                      }
 672              
 673 krisbash 1.1         p++;
 674              
 675              #if defined(_MSC_VER)
 676              # pragma prefast(push)
 677              # pragma prefast (disable: 26018)
 678              #endif
 679                      p = _SkipInner(p);
 680              #if defined(_MSC_VER)
 681              # pragma prefast(pop)
 682              #endif
 683              
 684                      if (*p == ':')
 685                      {
 686                          colon = p++;
 687                          p = _SkipInner(p);
 688                      }
 689              
 690                      nameEnd = p;
 691                  }
 692              
 693                  /* Seek the quote character (position p beyond quote) */
 694 krisbash 1.1     {
 695                      /* Skip spaces */
 696                      p = _SkipSpaces(self, p);
 697              
 698                      /* Expect a '=' character */
 699                      if (*p++ != '=')
 700                      {
 701                          *nameEnd = '\0';
 702                          XML_Raise(
 703                              self, 
 704                              ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_EQUALS, 
 705                              "expected '=' character after '%s'", 
 706                              name);
 707                          return;
 708                      }
 709                  }
 710              
 711                  /* Null-terminate name now that we are beyond the '=' */
 712                  *nameEnd = '\0';
 713              
 714                  /* Skip spaces */
 715 krisbash 1.1     p = _SkipSpaces(self, p);
 716              
 717                  /* Parse the value */
 718                  {
 719                      Char quote;
 720              
 721                      /* Expect opening quote */
 722                      if (*p != '"' && *p != '\'')
 723                      {
 724                          XML_Raise(
 725                              self, 
 726                              ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_OPENING_QUOTES, 
 727                              "expected opening quote after %s", 
 728                              name);
 729                          return;
 730                      }
 731              
 732                      quote = *p++;
 733                      value = p;
 734              
 735                      valueEnd = _ReduceAttrValue(self, &p, quote);
 736 krisbash 1.1 
 737                      if (self->status)
 738                      {
 739                          /* Propagate error */
 740                          return;
 741                      }
 742              
 743                      /* Expect closing quote */
 744                      if (*p++ != quote)
 745                      {
 746                          XML_Raise(
 747                              self, 
 748                              ID_MIUTILS_XMLPARSER_EXPECTED_ATTRIBUTE_CLOSING_QUOTES, 
 749                              "expected closing quote after '%s'",
 750                              name);
 751                          return;
 752                      }
 753              
 754                      /* Null-terminate the value */
 755                      *valueEnd = '\0';
 756                  }
 757 krisbash 1.1 
 758                  /* Skip spaces */
 759                  p = _SkipSpaces(self, p);
 760              
 761                  /* Separate the prefix and tag */
 762                  if (colon)
 763                  {
 764                      *colon = '\0';
 765                      tag = colon + 1;
 766                      prefix = name;
 767                  }
 768                  /* else prefix is "" and name is the tag */
 769              
 770                  /* If the first/only token is "xmlns", extract namespace */
 771                  if (name[0] == 'x' &&
 772                      name[1] == 'm' &&
 773                      name[2] == 'l' &&
 774                      name[3] == 'n' &&
 775                      name[4] == 's' &&
 776                      name[5] == '\0')
 777                  {
 778 krisbash 1.1         unsigned int tagCode = 0;
 779              
 780                      /* The namespace of the xmlns:x attribute is fixed */
 781                      prefix = T("xmlns");
 782              
 783                      if (colon)
 784                      {
 785                          /* For non-default namespaces */
 786                          tagCode = _HashCode(tag, nameEnd - tag);
 787                      }
 788              
 789                      /* Add new namespace entry */
 790                      {
 791                          /* Check for stack overflow */
 792                          if (self->nameSpacesSize == XML_MAX_NAMESPACES)
 793                          {
 794                              XML_Raise(
 795                                  self, 
 796                                  ID_MIUTILS_XMLPARSER_TOO_MANY_NAMESPACES,
 797                                  "too many namespaces: (>%u)",
 798                                  (int)XML_MAX_NAMESPACES);
 799 krisbash 1.1                 return;
 800                          }
 801                          {
 802                              XML_NameSpace* newNs = &self->nameSpaces[self->nameSpacesSize++];
 803                              newNs->name = colon ? tag : T("");
 804                              newNs->nameCode = tagCode;
 805                              newNs->id = _FindNamespaceID(self, value, valueEnd - value);
 806                              newNs->uri = value;
 807                              newNs->uriSize = valueEnd - value;
 808                              newNs->depth = self->stackSize;
 809                          }
 810                      }
 811                  }
 812              
 813                  /* Append attribute to element */
 814                  {
 815                      XML_Attr* attr;
 816              
 817                      /* Check for attribute array overflow */
 818                      if (elem->attrsSize == XML_MAX_ATTRIBUTES)
 819                      {
 820 krisbash 1.1             elem->data[elem->size] = 0;
 821              
 822                          XML_Raise(
 823                              self, 
 824                              ID_MIUTILS_XMLPARSER_TOO_MANY_ATTRIBUTES, 
 825                              "too many attributes '%s': (>%u)",
 826                              elem->data, 
 827                              (int)XML_MAX_ATTRIBUTES);
 828                          return;
 829                      }
 830              
 831                      attr = &elem->attrs[elem->attrsSize++];
 832                      attr->name = tag;
 833                      attr->nameSize = nameEnd - tag;
 834                      attr->value = value;
 835                      attr->valueSize = valueEnd - value;
 836              
 837                      /* Save the namespace prefix, which will be translated by the caller */
 838                      attr->namespaceUri = prefix;
 839                  }
 840              
 841 krisbash 1.1     *pInOut = p;
 842              }
 843              
 844              static void _ParseProcessingInstruction(
 845                  __inout XML* self, 
 846                  __inout XML_Elem* elem, 
 847                  __in_z Char* p)
 848              {
 849                  /* <?xml version="1.0" encoding="UTF-8" standalone="yes"?> */
 850                  Char* start;
 851                  Char* end;
 852              
 853                  /* Advance past '?' character */
 854                  if (*p)
 855                  {
 856                      p++;
 857                  }
 858              
 859                  /* Get tag identifier */
 860                  {
 861                      start = p;
 862 krisbash 1.1 
 863                      if (*p)
 864                      {
 865                          p = _SkipInner(p);
 866                      }
 867              
 868                      if (*p == ':')
 869                      {
 870                          p++;
 871                          if (*p)
 872                          {
 873                              p = _SkipInner(p);
 874                          }
 875                      }
 876              
 877                      /* If input exhuasted */
 878                      if (*p == '\0')
 879                      {
 880                          XML_Raise(
 881                              self, 
 882                              ID_MIUTILS_XMLPARSER_END_OF_XML_INSTRUCTION,
 883 krisbash 1.1                 "premature end of input");
 884                          return;
 885                      }
 886              
 887                      end = p;
 888                  }
 889              
 890                  /* Skip spaces */
 891                  if (*p)
 892                  {
 893                      p = _SkipSpaces(self, p);
 894                  }
 895              
 896                  elem->type = XML_INSTRUCTION;
 897                  elem->data = start;
 898                  elem->size = end - start;
 899              
 900                  /* Process attributes */
 901              
 902                  while (*p && *p != '?')
 903                  {
 904 krisbash 1.1         _ParseAttr(self, elem, &p);
 905              
 906                      if (self->status)
 907                      {
 908                          /* Propagate error */
 909                          return;
 910                      }
 911                  }
 912              
 913                  if (*p)
 914                  {
 915                      p++;
 916                  }
 917              
 918                  /* Skip spaces */
 919                  if (*p)
 920                  {
 921                      p = _SkipSpaces(self, p);
 922                  }
 923              
 924                  /* Expect '>' */
 925 krisbash 1.1     if (*p++ != '>')
 926                  {
 927                      XML_Raise(
 928                          self, 
 929                          ID_MIUTILS_XMLPARSER_END_OF_INSTRUCTION_MISSING,
 930                          "expected closing angle bracket");
 931                      return;
 932                  }
 933              
 934                  /* Return element object */
 935                  elem->namespaceUri = T("");
 936                  elem->namespaceUriSize = 0;
 937                  elem->namespaceId = '\0';
 938                  *end = '\0';
 939              
 940                  self->ptr = p;
 941              
 942                  if (self->foundRoot)
 943                      self->state = STATE_CHARS;
 944                  else
 945                      self->state = STATE_START;
 946 krisbash 1.1 }
 947              
 948              static void _ParseStartTag(
 949                  __inout XML* self, 
 950                  __inout XML_Elem* elem, 
 951                  __in_z Char* p)
 952              {
 953                  Char* name;
 954                  Char* nameEnd;
 955                  Char* colon = NULL;
 956                  Char* prefix = T("");
 957                  const XML_NameSpace* ns;
 958                  size_t attr;
 959              
 960                  /* Found the root */
 961                  self->foundRoot = 1;
 962              
 963                  /* Get tag identifier */
 964                  {
 965                      name = p;
 966              
 967 krisbash 1.1         if (!*p || !_IsFirst(*p++))
 968                      {
 969                          XML_Raise(
 970                              self, 
 971                              ID_MIUTILS_XMLPARSER_ELEMENT_NAME_EXPECTED,
 972                              "expected element name");
 973                          return;
 974                      }
 975              
 976                      if (*p)
 977                      {
 978                          p = _SkipInner(p);
 979                      }
 980              
 981                      if (*p == ':')
 982                      {
 983                          colon = p++;
 984                          if (*p)
 985                          {
 986                              p = _SkipInner(p);
 987                          }
 988 krisbash 1.1         }
 989              
 990                      /* If input exhuasted */
 991                      if (*p == '\0')
 992                      {
 993                          XML_Raise(
 994                              self, 
 995                              ID_MIUTILS_XMLPARSER_ELEMENT_NAME_PREMATURE_END,
 996                              "premature end of input");
 997                          return;
 998                      }
 999              
1000                      nameEnd = p;
1001                  }
1002              
1003                  if (colon)
1004                  {
1005                      *colon = '\0';
1006                      prefix = name;
1007                      name = colon + 1;
1008                  }
1009 krisbash 1.1 
1010                  /* Skip spaces */
1011                  p = _SkipSpaces(self, p);
1012              
1013                  elem->type = XML_START;
1014                  elem->data = name;
1015                  elem->size = nameEnd - name;
1016              
1017                  /* Process attributes */
1018                  while (*p && *p != '/' && *p != '>')
1019                  {
1020                      _ParseAttr(self, elem, &p);
1021              
1022                      if (self->status)
1023                          return;
1024                  }
1025              
1026                  /* Translate the namespace after parsing xmlns attributes */
1027                  ns = _FindNamespace(self, prefix);
1028              
1029                  if (self->status)
1030 krisbash 1.1         return;
1031              
1032                  /* Now translate the attribute namespaces */
1033                  /* Unprefixed attributes inherit from the element */
1034                  for (attr = 0; attr < elem->attrsSize; attr++)
1035                  {
1036                      XML_Attr* item = elem->attrs + attr;
1037                      const XML_NameSpace* itemNS = ns;
1038              
1039                      if (item->namespaceUri[0] != '\0')
1040                      {
1041                          /* The namespaceUri field contains the prefix */
1042                          itemNS = _FindNamespace(self, item->namespaceUri);
1043              
1044                          if (self->status)
1045                              return;
1046                      }
1047              
1048                      item->namespaceUri = itemNS->uri;
1049                      item->namespaceUriSize = itemNS->uriSize;
1050                      item->namespaceId = itemNS->id;
1051 krisbash 1.1     }
1052              
1053                  /* Create the element */
1054                  elem->type = XML_START;
1055                  elem->data = name;
1056                  elem->size = nameEnd - name;
1057                  elem->namespaceUri = ns->uri;
1058                  elem->namespaceUriSize = ns->uriSize;
1059                  elem->namespaceId = ns->id;
1060              
1061                  /* Check for empty tag */
1062                  if (*p == '/')
1063                  {
1064                      p++;
1065              
1066                      /* Null-terminate the tag */
1067                      *nameEnd = '\0';
1068              
1069                      /* Inject an empty tag onto element stack */
1070                      {
1071                          /* Check for stack overflow */
1072 krisbash 1.1             if (self->elemStackSize == XML_MAX_NESTED)
1073                          {
1074                              XML_Raise(
1075                                  self, 
1076                                  ID_MIUTILS_XMLPARSER_ELEMENT_DEPTH_OVERFLOW, 
1077                                  "element stack overflow (>%u)",
1078                                  (int)XML_MAX_NESTED);
1079                              return;
1080                          }
1081              
1082                          self->elemStack[self->elemStackSize] = *elem;
1083                          self->elemStack[self->elemStackSize].type = XML_END;
1084                          self->elemStackSize++;
1085                          self->nesting++;
1086                      }
1087              
1088                      /* Skip space */
1089                      if (*p)
1090                      {
1091                          p = _SkipSpaces(self, p);
1092                      }
1093 krisbash 1.1 
1094                      /* Expect '>' */
1095                      if (*p++ != '>')
1096                      {
1097                          *nameEnd = '\0';
1098                          XML_Raise(
1099                              self, 
1100                              ID_MIUTILS_XMLPARSER_ELEMENT_NAME_NOT_CLOSED, 
1101                              "expected closing angle bracket: %s",
1102                              elem->data);
1103                          return;
1104                      }
1105              
1106                      self->ptr = p;
1107                      self->state = STATE_CHARS;
1108                      return;
1109                  }
1110              
1111                  /* Expect '>' */
1112                  if (*p++ != '>')
1113                  {
1114 krisbash 1.1         *nameEnd = '\0';
1115                      XML_Raise(
1116                          self, 
1117                          ID_MIUTILS_XMLPARSER_ELEMENT_NAME_NOT_CLOSED, 
1118                          "expected closing angle bracket: '%s'",
1119                          elem->data);
1120                      return;
1121                  }
1122              
1123                  /* Zero-terminate the name tag */
1124                  *nameEnd = '\0';
1125              
1126                  /* Push opening tag */
1127                  {
1128                      if (self->stackSize == XML_MAX_NESTED)
1129                      {
1130                          XML_Raise(
1131                              self, 
1132                              ID_MIUTILS_XMLPARSER_ELEMENT_DEPTH_OVERFLOW, 
1133                              "element stack overflow (>%u)",
1134                              (int)XML_MAX_NESTED);
1135 krisbash 1.1             return;
1136                      }
1137              
1138                      {
1139                          XML_Name tmp;
1140                          tmp.data = elem->data;
1141                          tmp.size = elem->size;
1142                          tmp.namespaceUri = elem->namespaceUri;
1143                          tmp.namespaceUriSize = elem->namespaceUriSize;
1144                          tmp.namespaceId = elem->namespaceId;
1145              
1146                          self->stack[self->stackSize] = tmp;
1147                          self->stackSize++;
1148                          self->nesting++;
1149                      }
1150                  }
1151              
1152                  self->ptr = p;
1153              
1154                  if (self->foundRoot)
1155                      self->state = STATE_CHARS;
1156 krisbash 1.1     else
1157                      self->state = STATE_START;
1158              }
1159              
1160              static void _ParseEndTag(
1161                  __inout XML* self, 
1162                  __inout XML_Elem* elem, 
1163                  __in_z Char* p)
1164              {
1165                  /* Closing element: </name> */
1166                  Char* name;
1167                  Char* nameEnd;
1168                  Char* colon = NULL;
1169                  Char* prefix = T("");
1170                  const XML_NameSpace *ns;
1171              
1172                  if (*p)
1173                  {
1174                      p++;
1175                  }
1176              
1177 krisbash 1.1     /* Skip space */
1178                  if (*p)
1179                  {
1180                      p = _SkipSpaces(self, p);
1181                  }
1182              
1183                  name = p;
1184              
1185                  /* Skip name */
1186                  {
1187                      if (!*p || !_IsFirst(*p++))
1188                      {
1189                          XML_Raise(
1190                              self, 
1191                              ID_MIUTILS_XMLPARSER_ELEMENT_NAME_EXPECTED_ELEM_END,
1192                              "expected end tag");
1193                          return;
1194                      }
1195              
1196                      if (*p)
1197                      {
1198 krisbash 1.1             p = _SkipInner(p);
1199                      }
1200              
1201                      if (*p == ':')
1202                      {
1203                          colon = p++;
1204                          if (*p)
1205                          {
1206                              p = _SkipInner(p);
1207                          }
1208                      }
1209                  }
1210              
1211                  /* If input exhuasted */
1212                  if (*p == '\0')
1213                  {
1214                      XML_Raise(
1215                          self, 
1216                          ID_MIUTILS_XMLPARSER_ELEMENT_NAME_PREMATURE_END_ELEM_END,
1217                          "premature end of input");
1218                      return;
1219 krisbash 1.1     }
1220              
1221                  nameEnd = p;
1222              
1223                  if (colon)
1224                  {
1225                      *colon = '\0';
1226                      prefix = name;
1227                      name = colon + 1;
1228                  }
1229              
1230                  /* Skip spaces */
1231                  p = _SkipSpaces(self, p);
1232              
1233                  /* Expect '>' */
1234                  if (*p++ != '>')
1235                  {
1236                      XML_Raise(
1237                          self,
1238                          ID_MIUTILS_XMLPARSER_ELEMENT_NAME_NOT_CLOSED_ELEM_END,
1239                          "expected closing angle bracket");
1240 krisbash 1.1         return;
1241                  }
1242              
1243                  /* Null terminate name */
1244                  *nameEnd = '\0';
1245              
1246                  ns = _FindNamespace(self, prefix);
1247              
1248                  if (self->status)
1249                      return;
1250              
1251                  /* Return element object */
1252                  elem->type = XML_END;
1253                  elem->data = name;
1254                  elem->size = nameEnd - name;
1255                  elem->namespaceUri = ns->uri;
1256                  elem->namespaceUriSize = ns->uriSize;
1257                  elem->namespaceId = ns->id;
1258              
1259                  /* Match opening name */
1260                  {
1261 krisbash 1.1         /* Check for stack underflow */
1262                      if (self->stackSize-- == 0)
1263                      {
1264                          XML_Raise(
1265                              self, 
1266                              ID_MIUTILS_XMLPARSER_ELEMENT_TOO_MANY_ENDS, 
1267                              "too many closing tags: %s",
1268                              name);
1269                          return;
1270                      }
1271              
1272                      self->nesting--;
1273              
1274                      /* Check that closing name matches opening name */
1275                      {
1276                          XML_Name* xn = &self->stack[self->stackSize];
1277              
1278                          if (XML_strcmp(xn->data, name) != 0 ||
1279                              xn->namespaceId != ns->id ||
1280                              (ns->id == 0 && XML_strcmp(xn->namespaceUri, ns->uri) != 0))
1281                          {
1282 krisbash 1.1                 XML_Raise(
1283                                  self, 
1284                                  ID_MIUTILS_XMLPARSER_ELEMENT_END_ELEMENT_TAG_NOT_MATCH_START_TAG, 
1285                                  "open/close tag mismatch: %s/%s", 
1286                                  self->stack[self->stackSize].data, 
1287                                  name);
1288                              return;
1289                          }
1290                      }
1291                  }
1292              
1293                  /* Remove namespaces that have just gone out of scope */
1294                  {
1295                      size_t i;
1296                      size_t n = 0;
1297              
1298                      for (i = self->nameSpacesSize; i--; )
1299                      {
1300                          if (self->nameSpaces[i].depth >= self->stackSize)
1301                              n++;
1302                      }
1303 krisbash 1.1 
1304                      if (n)
1305                      {
1306                          self->nameSpacesSize -= n;
1307              
1308                          /* Clear single-entry cache */
1309                          if (self->nameSpacesCacheIndex >= self->nameSpacesSize)
1310                              self->nameSpacesCacheIndex = (size_t)-1;
1311                      }
1312                  }
1313              
1314                  /* Set next state */
1315                  self->ptr = p;
1316                  self->state = STATE_CHARS;
1317              }
1318              
1319              static void _ParseComment(
1320                  __inout XML* self, 
1321                  __inout XML_Elem* elem, 
1322                  __in_z Char* p)
1323              {
1324 krisbash 1.1     /* Comment: <!-- blah blah blah --> */
1325                  Char* start;
1326                  Char* end;
1327              
1328                  if (!p[0] || !p[1])
1329                  {
1330                      XML_Raise(
1331                          self, 
1332                          ID_MIUTILS_XMLPARSER_COMMENT_PREMATURE_END,
1333                          "premature end of comment");
1334                      return;
1335                  }
1336                  p += 2;
1337                  start = p;
1338              
1339                  while (*p)
1340                  {
1341                      if (p[0] == '-' && p[1] == '-')
1342                      {
1343                          if (p[2] != '>')
1344                          {
1345 krisbash 1.1                 XML_Raise(
1346                                  self, 
1347                                  ID_MIUTILS_XMLPARSER_COMMENT_END_EXPECTED,
1348                                  "expected closing comment");
1349                              return;
1350                          }
1351              
1352                          /* Null-terminate this comment */
1353                          end = p;
1354                          p += 3;
1355              
1356                          /* Prepare element */
1357                          elem->type = XML_COMMENT;
1358                          elem->data = start;
1359                          elem->size = end - start;
1360                          elem->namespaceUri = T("");
1361                          elem->namespaceUriSize = 0;
1362                          elem->namespaceId = '\0';
1363                          *end = '\0';
1364              
1365                          /* Set next state */
1366 krisbash 1.1             self->ptr = p;
1367              
1368                          if (self->foundRoot)
1369                              self->state = STATE_CHARS;
1370                          else
1371                              self->state = STATE_START;
1372              
1373                          return;
1374                      }
1375                      else if (p[0] == '\n')
1376                          self->line++;
1377              
1378                      p++;
1379                  }
1380              
1381                  XML_Raise(
1382                      self, 
1383                      ID_MIUTILS_XMLPARSER_COMMENT_PREMATURE_END,
1384                      "malformed comment");
1385              }
1386              
1387 krisbash 1.1 static void _ParseCDATA(
1388                  __inout XML* self, 
1389                  __inout XML_Elem* elem, 
1390                  __in_z Char* p)
1391              {
1392                  /* <![CDATA[...]]> */
1393                  Char* start;
1394                  Char* end;
1395              
1396                  /* Recognize <!DOCTYPE ...> */
1397                  if (!p[0] || !p[1] || !p[2] || !p[3] || !p[4] || !p[5] || !p[6])
1398                  {
1399                      XML_Raise(
1400                          self, 
1401                          ID_MIUTILS_XMLPARSER_CDATA_PREMATURE_END,
1402                          "premature end of CDATA");
1403                      return;
1404                  }
1405                  p += 7;
1406                  start = p;
1407              
1408 krisbash 1.1     while (*p)
1409                  {
1410                      if (p[0] == ']' && p[1] == ']' && p[2] == '>')
1411                      {
1412                          end = p;
1413                          p += 3;
1414              
1415                          /* Prepare element */
1416                          elem->type = XML_CHARS;
1417                          elem->data = start;
1418                          elem->size = end - start;
1419                          elem->namespaceUri = T("");
1420                          elem->namespaceUriSize = 0;
1421                          elem->namespaceId = '\0';
1422                          *end = '\0';
1423              
1424                          /* Set next state */
1425                          self->ptr = p;
1426                          self->state = STATE_CHARS;
1427              
1428                          return;
1429 krisbash 1.1 
1430                      }
1431                      else if (p[0] == '\n')
1432                          self->line++;
1433              
1434                      p++;
1435                  }
1436              
1437                  XML_Raise(
1438                      self, 
1439                      ID_MIUTILS_XMLPARSER_CDATA_PREMATURE_END,
1440                      "unterminated CDATA section");
1441                  return;
1442              }
1443              
1444              static void _ParseDOCTYPE(
1445                  __inout XML* self, 
1446                  __inout XML_Elem* elem, 
1447                  __in_z Char* p)
1448              {
1449                  MI_UNUSED(elem);
1450 krisbash 1.1 
1451                  /* Recognize <!DOCTYPE ...> */
1452                  if (!p[0] || !p[1] || !p[2] || !p[3] || !p[4] || !p[5] || !p[6])
1453                  {
1454                      XML_Raise(self, 
1455                          ID_MIUTILS_XMLPARSER_DOCTYPE_PREMATURE_END,
1456                          "premature end of DOCTYPE");
1457                      return;
1458                  }
1459                  p += 7;
1460              
1461                  while (*p != '\0' && *p != '>')
1462                  {
1463                      if (p[0] == '\n')
1464                          self->line++;
1465              
1466                      p++;
1467                  }
1468              
1469                  if (*p++ != '>')
1470                  {
1471 krisbash 1.1         XML_Raise(
1472                          self, 
1473                          ID_MIUTILS_XMLPARSER_DOCTYPE_PREMATURE_END,
1474                          "premature end of DOCTYPE");
1475                      return;
1476                  }
1477              
1478                  /* Set next state */
1479                  self->ptr = p;
1480              
1481                  if (self->foundRoot)
1482                      self->state = STATE_CHARS;
1483                  else
1484                      self->state = STATE_START;
1485              }
1486              
1487              static int _ParseCharData(
1488                  __inout XML* self, 
1489                  __inout XML_Elem* elem, 
1490                  __in_z Char* p)
1491              {
1492 krisbash 1.1     Char* start;
1493                  Char* end;
1494              
1495              #if 0
1496                  pca: Need function to strip leading and trailing white space on a needed basis
1497                  /* Skip leading spaces */
1498                  p = _SkipSpaces(self, p);
1499              #endif
1500                  /* Reject input if it does appear inside tags */
1501                  if (self->stackSize == 0)
1502                  {
1503                      /* Finished parsing document */
1504                      self->status = 1;
1505                      self->ptr = p;
1506                      return 0;
1507                  }
1508              
1509              #if 0
1510                  pca: Need function to strip leading and trailing white space on a needed basis
1511                  /* Remove leading spaces */
1512                  p = _SkipSpaces(self, p);
1513 krisbash 1.1 #endif
1514              
1515                  if (*p == '<')
1516                  {
1517                      self->ptr = p + 1;
1518                      self->state = STATE_TAG;
1519                      return 0;
1520                  }
1521              
1522                  /* Save pointer to start of data */
1523                  start = p;
1524              
1525                  /* reduce character data */
1526                  end = _ReduceCharData(self, &p);
1527              
1528                  if (self->status)
1529                  {
1530                      /* Propagate error */
1531                      return 0;
1532                  }
1533              
1534 krisbash 1.1     /* Process character data */
1535                  if (*p != '<')
1536                  {
1537                      XML_Raise(
1538                          self, 
1539                          ID_MIUTILS_XMLPARSER_CHARDATA_EXPECTED_ELEMENT_END_TAG,
1540                          "expcted opening angle bracket");
1541                      return 0;
1542                  }
1543              
1544              #if 0
1545                  pca: Need function to strip leading and trailing white space on a needed basis
1546                  /* Remove trailing spaces (the newlines have already been counted) */
1547                  {
1548                      /* Remove trainling spaces from the character data */
1549                      start[-1] = '\0';
1550              
1551                      while (_IsSpace(end[-1]))
1552                          end--;
1553                  }
1554              #endif
1555 krisbash 1.1 
1556                  /* Set next state */
1557                  self->ptr = p + 1;
1558                  self->state = STATE_TAG;
1559              
1560                  /* Return character data element if non-empty */
1561                  if (end == start)
1562                      return 0;
1563              
1564                  /* Prepare element */
1565                  *end = '\0';
1566                  elem->type = XML_CHARS;
1567                  elem->data = start;
1568                  elem->size = end - start;
1569                  elem->namespaceUri = T("");
1570                  elem->namespaceUriSize = 0;
1571                  elem->namespaceId = '\0';
1572              
1573              #if 1
1574                  XML_StripWhitespace(elem);
1575              #endif
1576 krisbash 1.1 
1577                  /* Return 1 to indicate non-empty element */
1578                  return 1;
1579              }
1580              
1581              /*
1582              **==============================================================================
1583              **
1584              ** Public definitions
1585              **
1586              **==============================================================================
1587              */
1588              
1589              const Char* XML_Elem_GetAttr(
1590                  __inout XML_Elem* self,
1591                  __in_z const Char* name)
1592              {
1593                  size_t i;
1594              
1595                  for (i = 0; i < self->attrsSize; i++)
1596                  {
1597 krisbash 1.1         if (XML_strcmp(name, self->attrs[i].name) == 0)
1598                          return self->attrs[i].value;
1599                  }
1600              
1601                  /* Not found! */
1602                  return NULL;
1603              }
1604              
1605              void XML_Init(
1606                  __out XML* self)
1607              {
1608                  memset(self, 0, sizeof(XML));
1609              
1610                  self->nameSpacesCacheIndex = (size_t)-1;
1611              }
1612              
1613              void XML_SetText(
1614                  __inout XML* self,
1615                  __in_z Char* text)
1616              {
1617                  self->text = text;
1618 krisbash 1.1     self->ptr = text;
1619                  self->line = 1;
1620                  self->state = STATE_START;
1621              }
1622              
1623              int XML_Next(
1624                  __inout XML* self,
1625                  __out XML_Elem* elem)
1626              {
1627                  if (self->elemStackSize)
1628                  {
1629                      *elem = self->elemStack[--self->elemStackSize];
1630                      self->nesting--;
1631                      return 0;
1632                  }
1633              
1634                  elem->attrsSize = 0;
1635              
1636                  for (;;)
1637                  {
1638                      switch (self->state)
1639 krisbash 1.1         {
1640                          case STATE_START:
1641                          {
1642                              Char* p = self->ptr;
1643              
1644                              /* Skip spaces */
1645                              p = _SkipSpaces(self, p);
1646              
1647                              /* Expect '<' */
1648                              if (*p != '<')
1649                              {
1650                                  XML_Raise(
1651                                      self, 
1652                                      ID_MIUTILS_XMLPARSER_OPEN_ANGLE_BRACKET_EXPECTED,
1653                                      "expected open angle bracket");
1654                                  return -1;
1655                              }
1656              
1657                              self->ptr = p + 1;
1658                              self->state = STATE_TAG;
1659                              break;
1660 krisbash 1.1             }
1661                          case STATE_TAG:
1662                          {
1663                              Char* p = self->ptr;
1664              
1665                              /* Skip spaces */
1666                              p = _SkipSpaces(self, p);
1667              
1668                              /* Expect one of these */
1669                              if (*p == '/')
1670                              {
1671                                  _ParseEndTag(self, elem, p);
1672                                  return self->status;
1673                              }
1674                              else if (_IsFirst(*p))
1675                              {
1676                                  _ParseStartTag(self, elem, p);
1677                                  return self->status;
1678                              }
1679                              else if (*p == '?')
1680                              {
1681 krisbash 1.1                     _ParseProcessingInstruction(self, elem, p);
1682                                  return self->status;
1683                              }
1684                              else if (*p == '!')
1685                              {
1686                                  p++;
1687              
1688                                  if (p[0] == '-' && p[1] == '-')
1689                                  {
1690                                      _ParseComment(self, elem, p);
1691                                      return self->status;
1692                                  }
1693                                  else if (memcmp(p, T("[CDATA["), 7 * sizeof(Char)) == 0)
1694                                  {
1695                                      _ParseCDATA(self, elem, p);
1696                                      return self->status;
1697                                  }
1698                                  else if (memcmp(p, T("DOCTYPE"), 7 * sizeof(Char)) == 0)
1699                                  {
1700                                      _ParseDOCTYPE(self, elem, p);
1701              
1702 krisbash 1.1                         if (self->status)
1703                                          return -1;
1704              
1705                                      break;
1706                                  }
1707                                  else
1708                                  {
1709                                      XML_Raise(
1710                                          self, 
1711                                          ID_MIUTILS_XMLPARSER_COMMENT_CDATA_DOCTYPE_EXPECTED,
1712                                          "expected comment, CDATA, or DOCTYPE");
1713                                      return -1;
1714                                  }
1715                              }
1716                              else
1717                              {
1718                                  XML_Raise(
1719                                      self, 
1720                                      ID_MIUTILS_XMLPARSER_ELEMENT_EXPECTED,
1721                                      "expected element");
1722                                  return-1;
1723 krisbash 1.1                 }
1724                              break;
1725                          }
1726                          case STATE_CHARS:
1727                          {
1728                              Char* p = self->ptr;
1729              
1730                              if (_ParseCharData(self, elem, p) == 1)
1731                              {
1732                                  /* Return character data to caller */
1733                                  return 0;
1734                              }
1735              
1736                              if (self->status)
1737                                  return self->status;
1738              
1739                              /* empty character data */
1740                              break;
1741                          }
1742                          default:
1743                          {
1744 krisbash 1.1                 XML_Raise(
1745                                  self, 
1746                                  ID_MIUTILS_XMLPARSER_UNEXPECTED_STATE,
1747                                  "unexpected state");
1748                              return -1;
1749                          }
1750                      }
1751                  }
1752              
1753                  //return 0;
1754              }
1755              
1756              int XML_Expect(
1757                  __inout XML* self,
1758                  __out XML_Elem* elem,
1759                  XML_Type type,
1760                  Char knownNamespaceID,   /* 0 means not well known */
1761                  __in_z_opt const Char * namespaceURI, /* NULL if expectedNamespace is not 0, otherwise real URL */
1762                  __in_z const Char* name)
1763              {
1764                  if (XML_Next(self, elem) == 0 && 
1765 krisbash 1.1         elem->type == type && 
1766                      (!name || XML_strcmp(elem->data, name) == 0))
1767                  {
1768                      if (!name)
1769                      {
1770                          return 0; /* No name means no namespace! */
1771                      }
1772                      if (knownNamespaceID)
1773                      {
1774                          if ( knownNamespaceID == elem->namespaceId)
1775                          {
1776                              return 0; /* Known namespace IDs the same */
1777                          }
1778                          XML_Raise(
1779                              self, 
1780                              ID_MIUTILS_UNKNOWN,
1781                              T("Namespace URI IDs do not match: <%c>: %c"), 
1782                              knownNamespaceID, 
1783                              elem->namespaceId);
1784                          return -1;
1785                      }
1786 krisbash 1.1         else
1787                      {
1788                          if (namespaceURI && elem->namespaceUri && 
1789                              (XML_strcmp(namespaceURI, elem->namespaceUri)== 0))
1790                          {
1791                              /* Non-standard namespace URIs match */
1792                              return 0;
1793                          }
1794                          XML_Raise(self, 
1795                              ID_MIUTILS_UNKNOWN,
1796                              T("Namespace URIs do not match: <%s>: %s"), 
1797                              namespaceURI, 
1798                              elem->namespaceUri);
1799                          return -1;
1800                      }
1801                  }
1802              
1803                  if (type == XML_START)
1804                      XML_Raise(
1805                          self, 
1806                          ID_MIUTILS_UNKNOWN,
1807 krisbash 1.1             T("expected element: <%s>: %s"), 
1808                          name, 
1809                          elem->data);
1810                  else if (type == XML_END)
1811                  {
1812                      XML_Raise(
1813                          self, 
1814                          ID_MIUTILS_UNKNOWN,
1815                          T("expected element: </%s>: %s"), 
1816                          name, 
1817                          elem->data);
1818                  }
1819                  else if (type == XML_CHARS)
1820                  {
1821                      XML_Raise(
1822                          self, 
1823                          ID_MIUTILS_UNKNOWN,
1824                          T("expected character data"));
1825                  }
1826              
1827                  return -1;
1828 krisbash 1.1 }
1829              
1830              int XML_Skip(
1831                  __inout XML* self)
1832              {
1833                  XML_Elem tmp;
1834                  size_t nesting = self->nesting;
1835              
1836                  while (self->nesting >= nesting)
1837                  {
1838                      if (XML_Next(self, &tmp) != 0)
1839                          return -1;
1840                  }
1841              
1842                  return 0;
1843              }
1844              
1845              int XML_RegisterNameSpace(
1846                  __inout XML* self,
1847                  Char id,
1848                  __in_z const Char* uri)
1849 krisbash 1.1 {
1850                  XML_RegisteredNameSpace rns;
1851                  /* ATTN: we do not check for duplicates */
1852              
1853                  /* Reject out of range ids */
1854                  if (id < 'a' || id > 'z')
1855                      return -1;
1856              
1857                  /* Check for overflow of the array */
1858                  if (self->registeredNameSpacesSize == XML_MAX_REGISTERED_NAMESPACES)
1859                      return -1;
1860              
1861                  rns.id = id;
1862                  rns.uri = uri;
1863                  rns.uriCode = _HashCode(uri, XML_strlen(uri));
1864              
1865                  self->registeredNameSpaces[self->registeredNameSpacesSize++] = rns;
1866              
1867                  return 0;
1868              }
1869              
1870 krisbash 1.1 int XML_PutBack(
1871                  __inout XML* self,
1872                  __in const XML_Elem* elem)
1873              {
1874                  /* Check for stack overflow */
1875                  if (self->elemStackSize == XML_MAX_NESTED)
1876                  {
1877                      XML_Raise(
1878                          self, 
1879                          ID_MIUTILS_UNKNOWN,
1880                          T("element stack overflow (>%u)"), 
1881                          XML_MAX_NESTED);
1882                      return -1;
1883                  }
1884              
1885                  self->elemStack[self->elemStackSize++] = *elem;
1886                  return 0;
1887              }
1888              
1889              void XML_PutError(__inout XML* self)
1890              {
1891 krisbash 1.1     if (self->status == -1)
1892                  {
1893                      XML_fprintf(stderr, T("line %u: %s\n"), (int)self->line, self->message);
1894                  }
1895              }
1896              
1897              void XML_Raise(
1898                  XML* self, 
1899                  unsigned formatStringId, 
1900                  const Char* format,
1901                  ...)
1902              {
1903              #if defined(CONFIG_ENABLE_WCHAR)
1904              # error "implement this!"
1905              #else
1906                  int n;
1907                  va_list ap;
1908                  memset(&ap, 0, sizeof(ap));
1909              
1910                  self->status = -1;
1911                  self->message[0] = '\0';
1912 krisbash 1.1 
1913                  va_start(ap, format);
1914                  n = XML_vsnprintf(self->message, sizeof(self->message), format, ap);
1915                  va_end(ap);
1916              #endif
1917              }
1918              
1919              void XML_FormatError(
1920                  __inout XML* self, 
1921                  __out_ecount_z(size) Char* format, 
1922                  size_t size)
1923              {
1924                  *format = '\0';
1925              
1926                  if (self->status == -1)
1927                  {
1928                      XML_snprintf(
1929                          format, 
1930                          size, 
1931                          T("%u: error: %s"), 
1932                          (unsigned int)self->line, 
1933 krisbash 1.1             self->message);
1934                  }
1935              }
1936              
1937              int XML_StripWhitespace(
1938                  __inout XML_Elem* elem)
1939              {
1940                  if (elem->type != XML_CHARS)
1941                  {
1942                      return 0;
1943                  }
1944              
1945                  //Strip leading white space
1946                  while (elem->size && _IsSpace(*elem->data))
1947                  {
1948                      elem->data++;
1949                      elem->size--;
1950                  }
1951                  //Strip trailing white space
1952                  while(elem->size && _IsSpace(elem->data[elem->size-1]))
1953                  {
1954 krisbash 1.1         elem->data[elem->size-1] = T('\0');
1955                      elem->size--;
1956                  }
1957                  return 0;
1958              }

ViewCVS 0.9.2