core/expat-2.0.1/lib/xmltok_impl.c
changeset 300 b6d834208d33
parent 296 ea3dbc023c80
equal deleted inserted replaced
299:f06c5eba524d 300:b6d834208d33
       
     1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
       
     2    See the file COPYING for copying permission.
       
     3 */
       
     4 
       
     5 /* This file is included! */
       
     6 #ifdef XML_TOK_IMPL_C
       
     7 
       
     8 #ifndef IS_INVALID_CHAR
       
     9 #define IS_INVALID_CHAR(enc, ptr, n) (0)
       
    10 #endif
       
    11 
       
    12 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
       
    13     case BT_LEAD ## n: \
       
    14       if (end - ptr < n) \
       
    15         return XML_TOK_PARTIAL_CHAR; \
       
    16       if (IS_INVALID_CHAR(enc, ptr, n)) { \
       
    17         *(nextTokPtr) = (ptr); \
       
    18         return XML_TOK_INVALID; \
       
    19       } \
       
    20       ptr += n; \
       
    21       break;
       
    22 
       
    23 #define INVALID_CASES(ptr, nextTokPtr) \
       
    24   INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
       
    25   INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
       
    26   INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
       
    27   case BT_NONXML: \
       
    28   case BT_MALFORM: \
       
    29   case BT_TRAIL: \
       
    30     *(nextTokPtr) = (ptr); \
       
    31     return XML_TOK_INVALID;
       
    32 
       
    33 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
       
    34    case BT_LEAD ## n: \
       
    35      if (end - ptr < n) \
       
    36        return XML_TOK_PARTIAL_CHAR; \
       
    37      if (!IS_NAME_CHAR(enc, ptr, n)) { \
       
    38        *nextTokPtr = ptr; \
       
    39        return XML_TOK_INVALID; \
       
    40      } \
       
    41      ptr += n; \
       
    42      break;
       
    43 
       
    44 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
       
    45   case BT_NONASCII: \
       
    46     if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
       
    47       *nextTokPtr = ptr; \
       
    48       return XML_TOK_INVALID; \
       
    49     } \
       
    50   case BT_NMSTRT: \
       
    51   case BT_HEX: \
       
    52   case BT_DIGIT: \
       
    53   case BT_NAME: \
       
    54   case BT_MINUS: \
       
    55     ptr += MINBPC(enc); \
       
    56     break; \
       
    57   CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
       
    58   CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
       
    59   CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
       
    60 
       
    61 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
       
    62    case BT_LEAD ## n: \
       
    63      if (end - ptr < n) \
       
    64        return XML_TOK_PARTIAL_CHAR; \
       
    65      if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
       
    66        *nextTokPtr = ptr; \
       
    67        return XML_TOK_INVALID; \
       
    68      } \
       
    69      ptr += n; \
       
    70      break;
       
    71 
       
    72 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
       
    73   case BT_NONASCII: \
       
    74     if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
       
    75       *nextTokPtr = ptr; \
       
    76       return XML_TOK_INVALID; \
       
    77     } \
       
    78   case BT_NMSTRT: \
       
    79   case BT_HEX: \
       
    80     ptr += MINBPC(enc); \
       
    81     break; \
       
    82   CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
       
    83   CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
       
    84   CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
       
    85 
       
    86 #ifndef PREFIX
       
    87 #define PREFIX(ident) ident
       
    88 #endif
       
    89 
       
    90 /* ptr points to character following "<!-" */
       
    91 
       
    92 static int PTRCALL
       
    93 PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
       
    94                     const char *end, const char **nextTokPtr)
       
    95 {
       
    96   if (ptr != end) {
       
    97     if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
       
    98       *nextTokPtr = ptr;
       
    99       return XML_TOK_INVALID;
       
   100     }
       
   101     ptr += MINBPC(enc);
       
   102     while (ptr != end) {
       
   103       switch (BYTE_TYPE(enc, ptr)) {
       
   104       INVALID_CASES(ptr, nextTokPtr)
       
   105       case BT_MINUS:
       
   106         if ((ptr += MINBPC(enc)) == end)
       
   107           return XML_TOK_PARTIAL;
       
   108         if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
       
   109           if ((ptr += MINBPC(enc)) == end)
       
   110             return XML_TOK_PARTIAL;
       
   111           if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   112             *nextTokPtr = ptr;
       
   113             return XML_TOK_INVALID;
       
   114           }
       
   115           *nextTokPtr = ptr + MINBPC(enc);
       
   116           return XML_TOK_COMMENT;
       
   117         }
       
   118         break;
       
   119       default:
       
   120         ptr += MINBPC(enc);
       
   121         break;
       
   122       }
       
   123     }
       
   124   }
       
   125   return XML_TOK_PARTIAL;
       
   126 }
       
   127 
       
   128 /* ptr points to character following "<!" */
       
   129 
       
   130 static int PTRCALL
       
   131 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
       
   132                  const char *end, const char **nextTokPtr)
       
   133 {
       
   134   if (ptr == end)
       
   135     return XML_TOK_PARTIAL;
       
   136   switch (BYTE_TYPE(enc, ptr)) {
       
   137   case BT_MINUS:
       
   138     return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   139   case BT_LSQB:
       
   140     *nextTokPtr = ptr + MINBPC(enc);
       
   141     return XML_TOK_COND_SECT_OPEN;
       
   142   case BT_NMSTRT:
       
   143   case BT_HEX:
       
   144     ptr += MINBPC(enc);
       
   145     break;
       
   146   default:
       
   147     *nextTokPtr = ptr;
       
   148     return XML_TOK_INVALID;
       
   149   }
       
   150   while (ptr != end) {
       
   151     switch (BYTE_TYPE(enc, ptr)) {
       
   152     case BT_PERCNT:
       
   153       if (ptr + MINBPC(enc) == end)
       
   154         return XML_TOK_PARTIAL;
       
   155       /* don't allow <!ENTITY% foo "whatever"> */
       
   156       switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
       
   157       case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
       
   158         *nextTokPtr = ptr;
       
   159         return XML_TOK_INVALID;
       
   160       }
       
   161       /* fall through */
       
   162     case BT_S: case BT_CR: case BT_LF:
       
   163       *nextTokPtr = ptr;
       
   164       return XML_TOK_DECL_OPEN;
       
   165     case BT_NMSTRT:
       
   166     case BT_HEX:
       
   167       ptr += MINBPC(enc);
       
   168       break;
       
   169     default:
       
   170       *nextTokPtr = ptr;
       
   171       return XML_TOK_INVALID;
       
   172     }
       
   173   }
       
   174   return XML_TOK_PARTIAL;
       
   175 }
       
   176 
       
   177 static int PTRCALL
       
   178 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
       
   179                       const char *end, int *tokPtr)
       
   180 {
       
   181   int upper = 0;
       
   182   *tokPtr = XML_TOK_PI;
       
   183   if (end - ptr != MINBPC(enc)*3)
       
   184     return 1;
       
   185   switch (BYTE_TO_ASCII(enc, ptr)) {
       
   186   case ASCII_x:
       
   187     break;
       
   188   case ASCII_X:
       
   189     upper = 1;
       
   190     break;
       
   191   default:
       
   192     return 1;
       
   193   }
       
   194   ptr += MINBPC(enc);
       
   195   switch (BYTE_TO_ASCII(enc, ptr)) {
       
   196   case ASCII_m:
       
   197     break;
       
   198   case ASCII_M:
       
   199     upper = 1;
       
   200     break;
       
   201   default:
       
   202     return 1;
       
   203   }
       
   204   ptr += MINBPC(enc);
       
   205   switch (BYTE_TO_ASCII(enc, ptr)) {
       
   206   case ASCII_l:
       
   207     break;
       
   208   case ASCII_L:
       
   209     upper = 1;
       
   210     break;
       
   211   default:
       
   212     return 1;
       
   213   }
       
   214   if (upper)
       
   215     return 0;
       
   216   *tokPtr = XML_TOK_XML_DECL;
       
   217   return 1;
       
   218 }
       
   219 
       
   220 /* ptr points to character following "<?" */
       
   221 
       
   222 static int PTRCALL
       
   223 PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
       
   224                const char *end, const char **nextTokPtr)
       
   225 {
       
   226   int tok;
       
   227   const char *target = ptr;
       
   228   if (ptr == end)
       
   229     return XML_TOK_PARTIAL;
       
   230   switch (BYTE_TYPE(enc, ptr)) {
       
   231   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   232   default:
       
   233     *nextTokPtr = ptr;
       
   234     return XML_TOK_INVALID;
       
   235   }
       
   236   while (ptr != end) {
       
   237     switch (BYTE_TYPE(enc, ptr)) {
       
   238     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   239     case BT_S: case BT_CR: case BT_LF:
       
   240       if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
       
   241         *nextTokPtr = ptr;
       
   242         return XML_TOK_INVALID;
       
   243       }
       
   244       ptr += MINBPC(enc);
       
   245       while (ptr != end) {
       
   246         switch (BYTE_TYPE(enc, ptr)) {
       
   247         INVALID_CASES(ptr, nextTokPtr)
       
   248         case BT_QUEST:
       
   249           ptr += MINBPC(enc);
       
   250           if (ptr == end)
       
   251             return XML_TOK_PARTIAL;
       
   252           if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   253             *nextTokPtr = ptr + MINBPC(enc);
       
   254             return tok;
       
   255           }
       
   256           break;
       
   257         default:
       
   258           ptr += MINBPC(enc);
       
   259           break;
       
   260         }
       
   261       }
       
   262       return XML_TOK_PARTIAL;
       
   263     case BT_QUEST:
       
   264       if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
       
   265         *nextTokPtr = ptr;
       
   266         return XML_TOK_INVALID;
       
   267       }
       
   268       ptr += MINBPC(enc);
       
   269       if (ptr == end)
       
   270         return XML_TOK_PARTIAL;
       
   271       if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   272         *nextTokPtr = ptr + MINBPC(enc);
       
   273         return tok;
       
   274       }
       
   275       /* fall through */
       
   276     default:
       
   277       *nextTokPtr = ptr;
       
   278       return XML_TOK_INVALID;
       
   279     }
       
   280   }
       
   281   return XML_TOK_PARTIAL;
       
   282 }
       
   283 
       
   284 static int PTRCALL
       
   285 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
       
   286                          const char *end, const char **nextTokPtr)
       
   287 {
       
   288   static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
       
   289                                      ASCII_T, ASCII_A, ASCII_LSQB };
       
   290   int i;
       
   291   /* CDATA[ */
       
   292   if (end - ptr < 6 * MINBPC(enc))
       
   293     return XML_TOK_PARTIAL;
       
   294   for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
       
   295     if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
       
   296       *nextTokPtr = ptr;
       
   297       return XML_TOK_INVALID;
       
   298     }
       
   299   }
       
   300   *nextTokPtr = ptr;
       
   301   return XML_TOK_CDATA_SECT_OPEN;
       
   302 }
       
   303 
       
   304 static int PTRCALL
       
   305 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
       
   306                         const char *end, const char **nextTokPtr)
       
   307 {
       
   308   if (ptr == end)
       
   309     return XML_TOK_NONE;
       
   310   if (MINBPC(enc) > 1) {
       
   311     size_t n = end - ptr;
       
   312     if (n & (MINBPC(enc) - 1)) {
       
   313       n &= ~(MINBPC(enc) - 1);
       
   314       if (n == 0)
       
   315         return XML_TOK_PARTIAL;
       
   316       end = ptr + n;
       
   317     }
       
   318   }
       
   319   switch (BYTE_TYPE(enc, ptr)) {
       
   320   case BT_RSQB:
       
   321     ptr += MINBPC(enc);
       
   322     if (ptr == end)
       
   323       return XML_TOK_PARTIAL;
       
   324     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
       
   325       break;
       
   326     ptr += MINBPC(enc);
       
   327     if (ptr == end)
       
   328       return XML_TOK_PARTIAL;
       
   329     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   330       ptr -= MINBPC(enc);
       
   331       break;
       
   332     }
       
   333     *nextTokPtr = ptr + MINBPC(enc);
       
   334     return XML_TOK_CDATA_SECT_CLOSE;
       
   335   case BT_CR:
       
   336     ptr += MINBPC(enc);
       
   337     if (ptr == end)
       
   338       return XML_TOK_PARTIAL;
       
   339     if (BYTE_TYPE(enc, ptr) == BT_LF)
       
   340       ptr += MINBPC(enc);
       
   341     *nextTokPtr = ptr;
       
   342     return XML_TOK_DATA_NEWLINE;
       
   343   case BT_LF:
       
   344     *nextTokPtr = ptr + MINBPC(enc);
       
   345     return XML_TOK_DATA_NEWLINE;
       
   346   INVALID_CASES(ptr, nextTokPtr)
       
   347   default:
       
   348     ptr += MINBPC(enc);
       
   349     break;
       
   350   }
       
   351   while (ptr != end) {
       
   352     switch (BYTE_TYPE(enc, ptr)) {
       
   353 #define LEAD_CASE(n) \
       
   354     case BT_LEAD ## n: \
       
   355       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
       
   356         *nextTokPtr = ptr; \
       
   357         return XML_TOK_DATA_CHARS; \
       
   358       } \
       
   359       ptr += n; \
       
   360       break;
       
   361     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
   362 #undef LEAD_CASE
       
   363     case BT_NONXML:
       
   364     case BT_MALFORM:
       
   365     case BT_TRAIL:
       
   366     case BT_CR:
       
   367     case BT_LF:
       
   368     case BT_RSQB:
       
   369       *nextTokPtr = ptr;
       
   370       return XML_TOK_DATA_CHARS;
       
   371     default:
       
   372       ptr += MINBPC(enc);
       
   373       break;
       
   374     }
       
   375   }
       
   376   *nextTokPtr = ptr;
       
   377   return XML_TOK_DATA_CHARS;
       
   378 }
       
   379 
       
   380 /* ptr points to character following "</" */
       
   381 
       
   382 static int PTRCALL
       
   383 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
       
   384                    const char *end, const char **nextTokPtr)
       
   385 {
       
   386   if (ptr == end)
       
   387     return XML_TOK_PARTIAL;
       
   388   switch (BYTE_TYPE(enc, ptr)) {
       
   389   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   390   default:
       
   391     *nextTokPtr = ptr;
       
   392     return XML_TOK_INVALID;
       
   393   }
       
   394   while (ptr != end) {
       
   395     switch (BYTE_TYPE(enc, ptr)) {
       
   396     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   397     case BT_S: case BT_CR: case BT_LF:
       
   398       for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
       
   399         switch (BYTE_TYPE(enc, ptr)) {
       
   400         case BT_S: case BT_CR: case BT_LF:
       
   401           break;
       
   402         case BT_GT:
       
   403           *nextTokPtr = ptr + MINBPC(enc);
       
   404           return XML_TOK_END_TAG;
       
   405         default:
       
   406           *nextTokPtr = ptr;
       
   407           return XML_TOK_INVALID;
       
   408         }
       
   409       }
       
   410       return XML_TOK_PARTIAL;
       
   411 #ifdef XML_NS
       
   412     case BT_COLON:
       
   413       /* no need to check qname syntax here,
       
   414          since end-tag must match exactly */
       
   415       ptr += MINBPC(enc);
       
   416       break;
       
   417 #endif
       
   418     case BT_GT:
       
   419       *nextTokPtr = ptr + MINBPC(enc);
       
   420       return XML_TOK_END_TAG;
       
   421     default:
       
   422       *nextTokPtr = ptr;
       
   423       return XML_TOK_INVALID;
       
   424     }
       
   425   }
       
   426   return XML_TOK_PARTIAL;
       
   427 }
       
   428 
       
   429 /* ptr points to character following "&#X" */
       
   430 
       
   431 static int PTRCALL
       
   432 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
       
   433                        const char *end, const char **nextTokPtr)
       
   434 {
       
   435   if (ptr != end) {
       
   436     switch (BYTE_TYPE(enc, ptr)) {
       
   437     case BT_DIGIT:
       
   438     case BT_HEX:
       
   439       break;
       
   440     default:
       
   441       *nextTokPtr = ptr;
       
   442       return XML_TOK_INVALID;
       
   443     }
       
   444     for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
       
   445       switch (BYTE_TYPE(enc, ptr)) {
       
   446       case BT_DIGIT:
       
   447       case BT_HEX:
       
   448         break;
       
   449       case BT_SEMI:
       
   450         *nextTokPtr = ptr + MINBPC(enc);
       
   451         return XML_TOK_CHAR_REF;
       
   452       default:
       
   453         *nextTokPtr = ptr;
       
   454         return XML_TOK_INVALID;
       
   455       }
       
   456     }
       
   457   }
       
   458   return XML_TOK_PARTIAL;
       
   459 }
       
   460 
       
   461 /* ptr points to character following "&#" */
       
   462 
       
   463 static int PTRCALL
       
   464 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
       
   465                     const char *end, const char **nextTokPtr)
       
   466 {
       
   467   if (ptr != end) {
       
   468     if (CHAR_MATCHES(enc, ptr, ASCII_x))
       
   469       return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   470     switch (BYTE_TYPE(enc, ptr)) {
       
   471     case BT_DIGIT:
       
   472       break;
       
   473     default:
       
   474       *nextTokPtr = ptr;
       
   475       return XML_TOK_INVALID;
       
   476     }
       
   477     for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
       
   478       switch (BYTE_TYPE(enc, ptr)) {
       
   479       case BT_DIGIT:
       
   480         break;
       
   481       case BT_SEMI:
       
   482         *nextTokPtr = ptr + MINBPC(enc);
       
   483         return XML_TOK_CHAR_REF;
       
   484       default:
       
   485         *nextTokPtr = ptr;
       
   486         return XML_TOK_INVALID;
       
   487       }
       
   488     }
       
   489   }
       
   490   return XML_TOK_PARTIAL;
       
   491 }
       
   492 
       
   493 /* ptr points to character following "&" */
       
   494 
       
   495 static int PTRCALL
       
   496 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
       
   497                 const char **nextTokPtr)
       
   498 {
       
   499   if (ptr == end)
       
   500     return XML_TOK_PARTIAL;
       
   501   switch (BYTE_TYPE(enc, ptr)) {
       
   502   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   503   case BT_NUM:
       
   504     return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   505   default:
       
   506     *nextTokPtr = ptr;
       
   507     return XML_TOK_INVALID;
       
   508   }
       
   509   while (ptr != end) {
       
   510     switch (BYTE_TYPE(enc, ptr)) {
       
   511     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   512     case BT_SEMI:
       
   513       *nextTokPtr = ptr + MINBPC(enc);
       
   514       return XML_TOK_ENTITY_REF;
       
   515     default:
       
   516       *nextTokPtr = ptr;
       
   517       return XML_TOK_INVALID;
       
   518     }
       
   519   }
       
   520   return XML_TOK_PARTIAL;
       
   521 }
       
   522 
       
   523 /* ptr points to character following first character of attribute name */
       
   524 
       
   525 static int PTRCALL
       
   526 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
       
   527                  const char **nextTokPtr)
       
   528 {
       
   529 #ifdef XML_NS
       
   530   int hadColon = 0;
       
   531 #endif
       
   532   while (ptr != end) {
       
   533     switch (BYTE_TYPE(enc, ptr)) {
       
   534     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   535 #ifdef XML_NS
       
   536     case BT_COLON:
       
   537       if (hadColon) {
       
   538         *nextTokPtr = ptr;
       
   539         return XML_TOK_INVALID;
       
   540       }
       
   541       hadColon = 1;
       
   542       ptr += MINBPC(enc);
       
   543       if (ptr == end)
       
   544         return XML_TOK_PARTIAL;
       
   545       switch (BYTE_TYPE(enc, ptr)) {
       
   546       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   547       default:
       
   548         *nextTokPtr = ptr;
       
   549         return XML_TOK_INVALID;
       
   550       }
       
   551       break;
       
   552 #endif
       
   553     case BT_S: case BT_CR: case BT_LF:
       
   554       for (;;) {
       
   555         int t;
       
   556 
       
   557         ptr += MINBPC(enc);
       
   558         if (ptr == end)
       
   559           return XML_TOK_PARTIAL;
       
   560         t = BYTE_TYPE(enc, ptr);
       
   561         if (t == BT_EQUALS)
       
   562           break;
       
   563         switch (t) {
       
   564         case BT_S:
       
   565         case BT_LF:
       
   566         case BT_CR:
       
   567           break;
       
   568         default:
       
   569           *nextTokPtr = ptr;
       
   570           return XML_TOK_INVALID;
       
   571         }
       
   572       }
       
   573     /* fall through */
       
   574     case BT_EQUALS:
       
   575       {
       
   576         int open;
       
   577 #ifdef XML_NS
       
   578         hadColon = 0;
       
   579 #endif
       
   580         for (;;) {
       
   581           ptr += MINBPC(enc);
       
   582           if (ptr == end)
       
   583             return XML_TOK_PARTIAL;
       
   584           open = BYTE_TYPE(enc, ptr);
       
   585           if (open == BT_QUOT || open == BT_APOS)
       
   586             break;
       
   587           switch (open) {
       
   588           case BT_S:
       
   589           case BT_LF:
       
   590           case BT_CR:
       
   591             break;
       
   592           default:
       
   593             *nextTokPtr = ptr;
       
   594             return XML_TOK_INVALID;
       
   595           }
       
   596         }
       
   597         ptr += MINBPC(enc);
       
   598         /* in attribute value */
       
   599         for (;;) {
       
   600           int t;
       
   601           if (ptr == end)
       
   602             return XML_TOK_PARTIAL;
       
   603           t = BYTE_TYPE(enc, ptr);
       
   604           if (t == open)
       
   605             break;
       
   606           switch (t) {
       
   607           INVALID_CASES(ptr, nextTokPtr)
       
   608           case BT_AMP:
       
   609             {
       
   610               int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
       
   611               if (tok <= 0) {
       
   612                 if (tok == XML_TOK_INVALID)
       
   613                   *nextTokPtr = ptr;
       
   614                 return tok;
       
   615               }
       
   616               break;
       
   617             }
       
   618           case BT_LT:
       
   619             *nextTokPtr = ptr;
       
   620             return XML_TOK_INVALID;
       
   621           default:
       
   622             ptr += MINBPC(enc);
       
   623             break;
       
   624           }
       
   625         }
       
   626         ptr += MINBPC(enc);
       
   627         if (ptr == end)
       
   628           return XML_TOK_PARTIAL;
       
   629         switch (BYTE_TYPE(enc, ptr)) {
       
   630         case BT_S:
       
   631         case BT_CR:
       
   632         case BT_LF:
       
   633           break;
       
   634         case BT_SOL:
       
   635           goto sol;
       
   636         case BT_GT:
       
   637           goto gt;
       
   638         default:
       
   639           *nextTokPtr = ptr;
       
   640           return XML_TOK_INVALID;
       
   641         }
       
   642         /* ptr points to closing quote */
       
   643         for (;;) {
       
   644           ptr += MINBPC(enc);
       
   645           if (ptr == end)
       
   646             return XML_TOK_PARTIAL;
       
   647           switch (BYTE_TYPE(enc, ptr)) {
       
   648           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   649           case BT_S: case BT_CR: case BT_LF:
       
   650             continue;
       
   651           case BT_GT:
       
   652           gt:
       
   653             *nextTokPtr = ptr + MINBPC(enc);
       
   654             return XML_TOK_START_TAG_WITH_ATTS;
       
   655           case BT_SOL:
       
   656           sol:
       
   657             ptr += MINBPC(enc);
       
   658             if (ptr == end)
       
   659               return XML_TOK_PARTIAL;
       
   660             if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   661               *nextTokPtr = ptr;
       
   662               return XML_TOK_INVALID;
       
   663             }
       
   664             *nextTokPtr = ptr + MINBPC(enc);
       
   665             return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
       
   666           default:
       
   667             *nextTokPtr = ptr;
       
   668             return XML_TOK_INVALID;
       
   669           }
       
   670           break;
       
   671         }
       
   672         break;
       
   673       }
       
   674     default:
       
   675       *nextTokPtr = ptr;
       
   676       return XML_TOK_INVALID;
       
   677     }
       
   678   }
       
   679   return XML_TOK_PARTIAL;
       
   680 }
       
   681 
       
   682 /* ptr points to character following "<" */
       
   683 
       
   684 static int PTRCALL
       
   685 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
       
   686                const char **nextTokPtr)
       
   687 {
       
   688 #ifdef XML_NS
       
   689   int hadColon;
       
   690 #endif
       
   691   if (ptr == end)
       
   692     return XML_TOK_PARTIAL;
       
   693   switch (BYTE_TYPE(enc, ptr)) {
       
   694   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   695   case BT_EXCL:
       
   696     if ((ptr += MINBPC(enc)) == end)
       
   697       return XML_TOK_PARTIAL;
       
   698     switch (BYTE_TYPE(enc, ptr)) {
       
   699     case BT_MINUS:
       
   700       return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   701     case BT_LSQB:
       
   702       return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
       
   703                                       end, nextTokPtr);
       
   704     }
       
   705     *nextTokPtr = ptr;
       
   706     return XML_TOK_INVALID;
       
   707   case BT_QUEST:
       
   708     return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   709   case BT_SOL:
       
   710     return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   711   default:
       
   712     *nextTokPtr = ptr;
       
   713     return XML_TOK_INVALID;
       
   714   }
       
   715 #ifdef XML_NS
       
   716   hadColon = 0;
       
   717 #endif
       
   718   /* we have a start-tag */
       
   719   while (ptr != end) {
       
   720     switch (BYTE_TYPE(enc, ptr)) {
       
   721     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   722 #ifdef XML_NS
       
   723     case BT_COLON:
       
   724       if (hadColon) {
       
   725         *nextTokPtr = ptr;
       
   726         return XML_TOK_INVALID;
       
   727       }
       
   728       hadColon = 1;
       
   729       ptr += MINBPC(enc);
       
   730       if (ptr == end)
       
   731         return XML_TOK_PARTIAL;
       
   732       switch (BYTE_TYPE(enc, ptr)) {
       
   733       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   734       default:
       
   735         *nextTokPtr = ptr;
       
   736         return XML_TOK_INVALID;
       
   737       }
       
   738       break;
       
   739 #endif
       
   740     case BT_S: case BT_CR: case BT_LF:
       
   741       {
       
   742         ptr += MINBPC(enc);
       
   743         while (ptr != end) {
       
   744           switch (BYTE_TYPE(enc, ptr)) {
       
   745           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   746           case BT_GT:
       
   747             goto gt;
       
   748           case BT_SOL:
       
   749             goto sol;
       
   750           case BT_S: case BT_CR: case BT_LF:
       
   751             ptr += MINBPC(enc);
       
   752             continue;
       
   753           default:
       
   754             *nextTokPtr = ptr;
       
   755             return XML_TOK_INVALID;
       
   756           }
       
   757           return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
       
   758         }
       
   759         return XML_TOK_PARTIAL;
       
   760       }
       
   761     case BT_GT:
       
   762     gt:
       
   763       *nextTokPtr = ptr + MINBPC(enc);
       
   764       return XML_TOK_START_TAG_NO_ATTS;
       
   765     case BT_SOL:
       
   766     sol:
       
   767       ptr += MINBPC(enc);
       
   768       if (ptr == end)
       
   769         return XML_TOK_PARTIAL;
       
   770       if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   771         *nextTokPtr = ptr;
       
   772         return XML_TOK_INVALID;
       
   773       }
       
   774       *nextTokPtr = ptr + MINBPC(enc);
       
   775       return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
       
   776     default:
       
   777       *nextTokPtr = ptr;
       
   778       return XML_TOK_INVALID;
       
   779     }
       
   780   }
       
   781   return XML_TOK_PARTIAL;
       
   782 }
       
   783 
       
   784 static int PTRCALL
       
   785 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
       
   786                    const char **nextTokPtr)
       
   787 {
       
   788   if (ptr == end)
       
   789     return XML_TOK_NONE;
       
   790   if (MINBPC(enc) > 1) {
       
   791     size_t n = end - ptr;
       
   792     if (n & (MINBPC(enc) - 1)) {
       
   793       n &= ~(MINBPC(enc) - 1);
       
   794       if (n == 0)
       
   795         return XML_TOK_PARTIAL;
       
   796       end = ptr + n;
       
   797     }
       
   798   }
       
   799   switch (BYTE_TYPE(enc, ptr)) {
       
   800   case BT_LT:
       
   801     return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   802   case BT_AMP:
       
   803     return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   804   case BT_CR:
       
   805     ptr += MINBPC(enc);
       
   806     if (ptr == end)
       
   807       return XML_TOK_TRAILING_CR;
       
   808     if (BYTE_TYPE(enc, ptr) == BT_LF)
       
   809       ptr += MINBPC(enc);
       
   810     *nextTokPtr = ptr;
       
   811     return XML_TOK_DATA_NEWLINE;
       
   812   case BT_LF:
       
   813     *nextTokPtr = ptr + MINBPC(enc);
       
   814     return XML_TOK_DATA_NEWLINE;
       
   815   case BT_RSQB:
       
   816     ptr += MINBPC(enc);
       
   817     if (ptr == end)
       
   818       return XML_TOK_TRAILING_RSQB;
       
   819     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
       
   820       break;
       
   821     ptr += MINBPC(enc);
       
   822     if (ptr == end)
       
   823       return XML_TOK_TRAILING_RSQB;
       
   824     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
   825       ptr -= MINBPC(enc);
       
   826       break;
       
   827     }
       
   828     *nextTokPtr = ptr;
       
   829     return XML_TOK_INVALID;
       
   830   INVALID_CASES(ptr, nextTokPtr)
       
   831   default:
       
   832     ptr += MINBPC(enc);
       
   833     break;
       
   834   }
       
   835   while (ptr != end) {
       
   836     switch (BYTE_TYPE(enc, ptr)) {
       
   837 #define LEAD_CASE(n) \
       
   838     case BT_LEAD ## n: \
       
   839       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
       
   840         *nextTokPtr = ptr; \
       
   841         return XML_TOK_DATA_CHARS; \
       
   842       } \
       
   843       ptr += n; \
       
   844       break;
       
   845     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
   846 #undef LEAD_CASE
       
   847     case BT_RSQB:
       
   848       if (ptr + MINBPC(enc) != end) {
       
   849          if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
       
   850            ptr += MINBPC(enc);
       
   851            break;
       
   852          }
       
   853          if (ptr + 2*MINBPC(enc) != end) {
       
   854            if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
       
   855              ptr += MINBPC(enc);
       
   856              break;
       
   857            }
       
   858            *nextTokPtr = ptr + 2*MINBPC(enc);
       
   859            return XML_TOK_INVALID;
       
   860          }
       
   861       }
       
   862       /* fall through */
       
   863     case BT_AMP:
       
   864     case BT_LT:
       
   865     case BT_NONXML:
       
   866     case BT_MALFORM:
       
   867     case BT_TRAIL:
       
   868     case BT_CR:
       
   869     case BT_LF:
       
   870       *nextTokPtr = ptr;
       
   871       return XML_TOK_DATA_CHARS;
       
   872     default:
       
   873       ptr += MINBPC(enc);
       
   874       break;
       
   875     }
       
   876   }
       
   877   *nextTokPtr = ptr;
       
   878   return XML_TOK_DATA_CHARS;
       
   879 }
       
   880 
       
   881 /* ptr points to character following "%" */
       
   882 
       
   883 static int PTRCALL
       
   884 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
       
   885                     const char **nextTokPtr)
       
   886 {
       
   887   if (ptr == end)
       
   888     return -XML_TOK_PERCENT;
       
   889   switch (BYTE_TYPE(enc, ptr)) {
       
   890   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   891   case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
       
   892     *nextTokPtr = ptr;
       
   893     return XML_TOK_PERCENT;
       
   894   default:
       
   895     *nextTokPtr = ptr;
       
   896     return XML_TOK_INVALID;
       
   897   }
       
   898   while (ptr != end) {
       
   899     switch (BYTE_TYPE(enc, ptr)) {
       
   900     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   901     case BT_SEMI:
       
   902       *nextTokPtr = ptr + MINBPC(enc);
       
   903       return XML_TOK_PARAM_ENTITY_REF;
       
   904     default:
       
   905       *nextTokPtr = ptr;
       
   906       return XML_TOK_INVALID;
       
   907     }
       
   908   }
       
   909   return XML_TOK_PARTIAL;
       
   910 }
       
   911 
       
   912 static int PTRCALL
       
   913 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
       
   914                       const char **nextTokPtr)
       
   915 {
       
   916   if (ptr == end)
       
   917     return XML_TOK_PARTIAL;
       
   918   switch (BYTE_TYPE(enc, ptr)) {
       
   919   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
       
   920   default:
       
   921     *nextTokPtr = ptr;
       
   922     return XML_TOK_INVALID;
       
   923   }
       
   924   while (ptr != end) {
       
   925     switch (BYTE_TYPE(enc, ptr)) {
       
   926     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
   927     case BT_CR: case BT_LF: case BT_S:
       
   928     case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
       
   929       *nextTokPtr = ptr;
       
   930       return XML_TOK_POUND_NAME;
       
   931     default:
       
   932       *nextTokPtr = ptr;
       
   933       return XML_TOK_INVALID;
       
   934     }
       
   935   }
       
   936   return -XML_TOK_POUND_NAME;
       
   937 }
       
   938 
       
   939 static int PTRCALL
       
   940 PREFIX(scanLit)(int open, const ENCODING *enc,
       
   941                 const char *ptr, const char *end,
       
   942                 const char **nextTokPtr)
       
   943 {
       
   944   while (ptr != end) {
       
   945     int t = BYTE_TYPE(enc, ptr);
       
   946     switch (t) {
       
   947     INVALID_CASES(ptr, nextTokPtr)
       
   948     case BT_QUOT:
       
   949     case BT_APOS:
       
   950       ptr += MINBPC(enc);
       
   951       if (t != open)
       
   952         break;
       
   953       if (ptr == end)
       
   954         return -XML_TOK_LITERAL;
       
   955       *nextTokPtr = ptr;
       
   956       switch (BYTE_TYPE(enc, ptr)) {
       
   957       case BT_S: case BT_CR: case BT_LF:
       
   958       case BT_GT: case BT_PERCNT: case BT_LSQB:
       
   959         return XML_TOK_LITERAL;
       
   960       default:
       
   961         return XML_TOK_INVALID;
       
   962       }
       
   963     default:
       
   964       ptr += MINBPC(enc);
       
   965       break;
       
   966     }
       
   967   }
       
   968   return XML_TOK_PARTIAL;
       
   969 }
       
   970 
       
   971 static int PTRCALL
       
   972 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
       
   973                   const char **nextTokPtr)
       
   974 {
       
   975   int tok;
       
   976   if (ptr == end)
       
   977     return XML_TOK_NONE;
       
   978   if (MINBPC(enc) > 1) {
       
   979     size_t n = end - ptr;
       
   980     if (n & (MINBPC(enc) - 1)) {
       
   981       n &= ~(MINBPC(enc) - 1);
       
   982       if (n == 0)
       
   983         return XML_TOK_PARTIAL;
       
   984       end = ptr + n;
       
   985     }
       
   986   }
       
   987   switch (BYTE_TYPE(enc, ptr)) {
       
   988   case BT_QUOT:
       
   989     return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   990   case BT_APOS:
       
   991     return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
       
   992   case BT_LT:
       
   993     {
       
   994       ptr += MINBPC(enc);
       
   995       if (ptr == end)
       
   996         return XML_TOK_PARTIAL;
       
   997       switch (BYTE_TYPE(enc, ptr)) {
       
   998       case BT_EXCL:
       
   999         return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
  1000       case BT_QUEST:
       
  1001         return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
  1002       case BT_NMSTRT:
       
  1003       case BT_HEX:
       
  1004       case BT_NONASCII:
       
  1005       case BT_LEAD2:
       
  1006       case BT_LEAD3:
       
  1007       case BT_LEAD4:
       
  1008         *nextTokPtr = ptr - MINBPC(enc);
       
  1009         return XML_TOK_INSTANCE_START;
       
  1010       }
       
  1011       *nextTokPtr = ptr;
       
  1012       return XML_TOK_INVALID;
       
  1013     }
       
  1014   case BT_CR:
       
  1015     if (ptr + MINBPC(enc) == end) {
       
  1016       *nextTokPtr = end;
       
  1017       /* indicate that this might be part of a CR/LF pair */
       
  1018       return -XML_TOK_PROLOG_S;
       
  1019     }
       
  1020     /* fall through */
       
  1021   case BT_S: case BT_LF:
       
  1022     for (;;) {
       
  1023       ptr += MINBPC(enc);
       
  1024       if (ptr == end)
       
  1025         break;
       
  1026       switch (BYTE_TYPE(enc, ptr)) {
       
  1027       case BT_S: case BT_LF:
       
  1028         break;
       
  1029       case BT_CR:
       
  1030         /* don't split CR/LF pair */
       
  1031         if (ptr + MINBPC(enc) != end)
       
  1032           break;
       
  1033         /* fall through */
       
  1034       default:
       
  1035         *nextTokPtr = ptr;
       
  1036         return XML_TOK_PROLOG_S;
       
  1037       }
       
  1038     }
       
  1039     *nextTokPtr = ptr;
       
  1040     return XML_TOK_PROLOG_S;
       
  1041   case BT_PERCNT:
       
  1042     return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
  1043   case BT_COMMA:
       
  1044     *nextTokPtr = ptr + MINBPC(enc);
       
  1045     return XML_TOK_COMMA;
       
  1046   case BT_LSQB:
       
  1047     *nextTokPtr = ptr + MINBPC(enc);
       
  1048     return XML_TOK_OPEN_BRACKET;
       
  1049   case BT_RSQB:
       
  1050     ptr += MINBPC(enc);
       
  1051     if (ptr == end)
       
  1052       return -XML_TOK_CLOSE_BRACKET;
       
  1053     if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
       
  1054       if (ptr + MINBPC(enc) == end)
       
  1055         return XML_TOK_PARTIAL;
       
  1056       if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
       
  1057         *nextTokPtr = ptr + 2*MINBPC(enc);
       
  1058         return XML_TOK_COND_SECT_CLOSE;
       
  1059       }
       
  1060     }
       
  1061     *nextTokPtr = ptr;
       
  1062     return XML_TOK_CLOSE_BRACKET;
       
  1063   case BT_LPAR:
       
  1064     *nextTokPtr = ptr + MINBPC(enc);
       
  1065     return XML_TOK_OPEN_PAREN;
       
  1066   case BT_RPAR:
       
  1067     ptr += MINBPC(enc);
       
  1068     if (ptr == end)
       
  1069       return -XML_TOK_CLOSE_PAREN;
       
  1070     switch (BYTE_TYPE(enc, ptr)) {
       
  1071     case BT_AST:
       
  1072       *nextTokPtr = ptr + MINBPC(enc);
       
  1073       return XML_TOK_CLOSE_PAREN_ASTERISK;
       
  1074     case BT_QUEST:
       
  1075       *nextTokPtr = ptr + MINBPC(enc);
       
  1076       return XML_TOK_CLOSE_PAREN_QUESTION;
       
  1077     case BT_PLUS:
       
  1078       *nextTokPtr = ptr + MINBPC(enc);
       
  1079       return XML_TOK_CLOSE_PAREN_PLUS;
       
  1080     case BT_CR: case BT_LF: case BT_S:
       
  1081     case BT_GT: case BT_COMMA: case BT_VERBAR:
       
  1082     case BT_RPAR:
       
  1083       *nextTokPtr = ptr;
       
  1084       return XML_TOK_CLOSE_PAREN;
       
  1085     }
       
  1086     *nextTokPtr = ptr;
       
  1087     return XML_TOK_INVALID;
       
  1088   case BT_VERBAR:
       
  1089     *nextTokPtr = ptr + MINBPC(enc);
       
  1090     return XML_TOK_OR;
       
  1091   case BT_GT:
       
  1092     *nextTokPtr = ptr + MINBPC(enc);
       
  1093     return XML_TOK_DECL_CLOSE;
       
  1094   case BT_NUM:
       
  1095     return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
  1096 #define LEAD_CASE(n) \
       
  1097   case BT_LEAD ## n: \
       
  1098     if (end - ptr < n) \
       
  1099       return XML_TOK_PARTIAL_CHAR; \
       
  1100     if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
       
  1101       ptr += n; \
       
  1102       tok = XML_TOK_NAME; \
       
  1103       break; \
       
  1104     } \
       
  1105     if (IS_NAME_CHAR(enc, ptr, n)) { \
       
  1106       ptr += n; \
       
  1107       tok = XML_TOK_NMTOKEN; \
       
  1108       break; \
       
  1109     } \
       
  1110     *nextTokPtr = ptr; \
       
  1111     return XML_TOK_INVALID;
       
  1112     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1113 #undef LEAD_CASE
       
  1114   case BT_NMSTRT:
       
  1115   case BT_HEX:
       
  1116     tok = XML_TOK_NAME;
       
  1117     ptr += MINBPC(enc);
       
  1118     break;
       
  1119   case BT_DIGIT:
       
  1120   case BT_NAME:
       
  1121   case BT_MINUS:
       
  1122 #ifdef XML_NS
       
  1123   case BT_COLON:
       
  1124 #endif
       
  1125     tok = XML_TOK_NMTOKEN;
       
  1126     ptr += MINBPC(enc);
       
  1127     break;
       
  1128   case BT_NONASCII:
       
  1129     if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
       
  1130       ptr += MINBPC(enc);
       
  1131       tok = XML_TOK_NAME;
       
  1132       break;
       
  1133     }
       
  1134     if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
       
  1135       ptr += MINBPC(enc);
       
  1136       tok = XML_TOK_NMTOKEN;
       
  1137       break;
       
  1138     }
       
  1139     /* fall through */
       
  1140   default:
       
  1141     *nextTokPtr = ptr;
       
  1142     return XML_TOK_INVALID;
       
  1143   }
       
  1144   while (ptr != end) {
       
  1145     switch (BYTE_TYPE(enc, ptr)) {
       
  1146     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
  1147     case BT_GT: case BT_RPAR: case BT_COMMA:
       
  1148     case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
       
  1149     case BT_S: case BT_CR: case BT_LF:
       
  1150       *nextTokPtr = ptr;
       
  1151       return tok;
       
  1152 #ifdef XML_NS
       
  1153     case BT_COLON:
       
  1154       ptr += MINBPC(enc);
       
  1155       switch (tok) {
       
  1156       case XML_TOK_NAME:
       
  1157         if (ptr == end)
       
  1158           return XML_TOK_PARTIAL;
       
  1159         tok = XML_TOK_PREFIXED_NAME;
       
  1160         switch (BYTE_TYPE(enc, ptr)) {
       
  1161         CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
       
  1162         default:
       
  1163           tok = XML_TOK_NMTOKEN;
       
  1164           break;
       
  1165         }
       
  1166         break;
       
  1167       case XML_TOK_PREFIXED_NAME:
       
  1168         tok = XML_TOK_NMTOKEN;
       
  1169         break;
       
  1170       }
       
  1171       break;
       
  1172 #endif
       
  1173     case BT_PLUS:
       
  1174       if (tok == XML_TOK_NMTOKEN)  {
       
  1175         *nextTokPtr = ptr;
       
  1176         return XML_TOK_INVALID;
       
  1177       }
       
  1178       *nextTokPtr = ptr + MINBPC(enc);
       
  1179       return XML_TOK_NAME_PLUS;
       
  1180     case BT_AST:
       
  1181       if (tok == XML_TOK_NMTOKEN)  {
       
  1182         *nextTokPtr = ptr;
       
  1183         return XML_TOK_INVALID;
       
  1184       }
       
  1185       *nextTokPtr = ptr + MINBPC(enc);
       
  1186       return XML_TOK_NAME_ASTERISK;
       
  1187     case BT_QUEST:
       
  1188       if (tok == XML_TOK_NMTOKEN)  {
       
  1189         *nextTokPtr = ptr;
       
  1190         return XML_TOK_INVALID;
       
  1191       }
       
  1192       *nextTokPtr = ptr + MINBPC(enc);
       
  1193       return XML_TOK_NAME_QUESTION;
       
  1194     default:
       
  1195       *nextTokPtr = ptr;
       
  1196       return XML_TOK_INVALID;
       
  1197     }
       
  1198   }
       
  1199   return -tok;
       
  1200 }
       
  1201 
       
  1202 static int PTRCALL
       
  1203 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
       
  1204                           const char *end, const char **nextTokPtr)
       
  1205 {
       
  1206   const char *start;
       
  1207   if (ptr == end)
       
  1208     return XML_TOK_NONE;
       
  1209   start = ptr;
       
  1210   while (ptr != end) {
       
  1211     switch (BYTE_TYPE(enc, ptr)) {
       
  1212 #define LEAD_CASE(n) \
       
  1213     case BT_LEAD ## n: ptr += n; break;
       
  1214     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1215 #undef LEAD_CASE
       
  1216     case BT_AMP:
       
  1217       if (ptr == start)
       
  1218         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
  1219       *nextTokPtr = ptr;
       
  1220       return XML_TOK_DATA_CHARS;
       
  1221     case BT_LT:
       
  1222       /* this is for inside entity references */
       
  1223       *nextTokPtr = ptr;
       
  1224       return XML_TOK_INVALID;
       
  1225     case BT_LF:
       
  1226       if (ptr == start) {
       
  1227         *nextTokPtr = ptr + MINBPC(enc);
       
  1228         return XML_TOK_DATA_NEWLINE;
       
  1229       }
       
  1230       *nextTokPtr = ptr;
       
  1231       return XML_TOK_DATA_CHARS;
       
  1232     case BT_CR:
       
  1233       if (ptr == start) {
       
  1234         ptr += MINBPC(enc);
       
  1235         if (ptr == end)
       
  1236           return XML_TOK_TRAILING_CR;
       
  1237         if (BYTE_TYPE(enc, ptr) == BT_LF)
       
  1238           ptr += MINBPC(enc);
       
  1239         *nextTokPtr = ptr;
       
  1240         return XML_TOK_DATA_NEWLINE;
       
  1241       }
       
  1242       *nextTokPtr = ptr;
       
  1243       return XML_TOK_DATA_CHARS;
       
  1244     case BT_S:
       
  1245       if (ptr == start) {
       
  1246         *nextTokPtr = ptr + MINBPC(enc);
       
  1247         return XML_TOK_ATTRIBUTE_VALUE_S;
       
  1248       }
       
  1249       *nextTokPtr = ptr;
       
  1250       return XML_TOK_DATA_CHARS;
       
  1251     default:
       
  1252       ptr += MINBPC(enc);
       
  1253       break;
       
  1254     }
       
  1255   }
       
  1256   *nextTokPtr = ptr;
       
  1257   return XML_TOK_DATA_CHARS;
       
  1258 }
       
  1259 
       
  1260 static int PTRCALL
       
  1261 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
       
  1262                        const char *end, const char **nextTokPtr)
       
  1263 {
       
  1264   const char *start;
       
  1265   if (ptr == end)
       
  1266     return XML_TOK_NONE;
       
  1267   start = ptr;
       
  1268   while (ptr != end) {
       
  1269     switch (BYTE_TYPE(enc, ptr)) {
       
  1270 #define LEAD_CASE(n) \
       
  1271     case BT_LEAD ## n: ptr += n; break;
       
  1272     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1273 #undef LEAD_CASE
       
  1274     case BT_AMP:
       
  1275       if (ptr == start)
       
  1276         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
       
  1277       *nextTokPtr = ptr;
       
  1278       return XML_TOK_DATA_CHARS;
       
  1279     case BT_PERCNT:
       
  1280       if (ptr == start) {
       
  1281         int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
       
  1282                                        end, nextTokPtr);
       
  1283         return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
       
  1284       }
       
  1285       *nextTokPtr = ptr;
       
  1286       return XML_TOK_DATA_CHARS;
       
  1287     case BT_LF:
       
  1288       if (ptr == start) {
       
  1289         *nextTokPtr = ptr + MINBPC(enc);
       
  1290         return XML_TOK_DATA_NEWLINE;
       
  1291       }
       
  1292       *nextTokPtr = ptr;
       
  1293       return XML_TOK_DATA_CHARS;
       
  1294     case BT_CR:
       
  1295       if (ptr == start) {
       
  1296         ptr += MINBPC(enc);
       
  1297         if (ptr == end)
       
  1298           return XML_TOK_TRAILING_CR;
       
  1299         if (BYTE_TYPE(enc, ptr) == BT_LF)
       
  1300           ptr += MINBPC(enc);
       
  1301         *nextTokPtr = ptr;
       
  1302         return XML_TOK_DATA_NEWLINE;
       
  1303       }
       
  1304       *nextTokPtr = ptr;
       
  1305       return XML_TOK_DATA_CHARS;
       
  1306     default:
       
  1307       ptr += MINBPC(enc);
       
  1308       break;
       
  1309     }
       
  1310   }
       
  1311   *nextTokPtr = ptr;
       
  1312   return XML_TOK_DATA_CHARS;
       
  1313 }
       
  1314 
       
  1315 #ifdef XML_DTD
       
  1316 
       
  1317 static int PTRCALL
       
  1318 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
       
  1319                          const char *end, const char **nextTokPtr)
       
  1320 {
       
  1321   int level = 0;
       
  1322   if (MINBPC(enc) > 1) {
       
  1323     size_t n = end - ptr;
       
  1324     if (n & (MINBPC(enc) - 1)) {
       
  1325       n &= ~(MINBPC(enc) - 1);
       
  1326       end = ptr + n;
       
  1327     }
       
  1328   }
       
  1329   while (ptr != end) {
       
  1330     switch (BYTE_TYPE(enc, ptr)) {
       
  1331     INVALID_CASES(ptr, nextTokPtr)
       
  1332     case BT_LT:
       
  1333       if ((ptr += MINBPC(enc)) == end)
       
  1334         return XML_TOK_PARTIAL;
       
  1335       if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
       
  1336         if ((ptr += MINBPC(enc)) == end)
       
  1337           return XML_TOK_PARTIAL;
       
  1338         if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
       
  1339           ++level;
       
  1340           ptr += MINBPC(enc);
       
  1341         }
       
  1342       }
       
  1343       break;
       
  1344     case BT_RSQB:
       
  1345       if ((ptr += MINBPC(enc)) == end)
       
  1346         return XML_TOK_PARTIAL;
       
  1347       if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
       
  1348         if ((ptr += MINBPC(enc)) == end)
       
  1349           return XML_TOK_PARTIAL;
       
  1350         if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
       
  1351           ptr += MINBPC(enc);
       
  1352           if (level == 0) {
       
  1353             *nextTokPtr = ptr;
       
  1354             return XML_TOK_IGNORE_SECT;
       
  1355           }
       
  1356           --level;
       
  1357         }
       
  1358       }
       
  1359       break;
       
  1360     default:
       
  1361       ptr += MINBPC(enc);
       
  1362       break;
       
  1363     }
       
  1364   }
       
  1365   return XML_TOK_PARTIAL;
       
  1366 }
       
  1367 
       
  1368 #endif /* XML_DTD */
       
  1369 
       
  1370 static int PTRCALL
       
  1371 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
       
  1372                    const char **badPtr)
       
  1373 {
       
  1374   ptr += MINBPC(enc);
       
  1375   end -= MINBPC(enc);
       
  1376   for (; ptr != end; ptr += MINBPC(enc)) {
       
  1377     switch (BYTE_TYPE(enc, ptr)) {
       
  1378     case BT_DIGIT:
       
  1379     case BT_HEX:
       
  1380     case BT_MINUS:
       
  1381     case BT_APOS:
       
  1382     case BT_LPAR:
       
  1383     case BT_RPAR:
       
  1384     case BT_PLUS:
       
  1385     case BT_COMMA:
       
  1386     case BT_SOL:
       
  1387     case BT_EQUALS:
       
  1388     case BT_QUEST:
       
  1389     case BT_CR:
       
  1390     case BT_LF:
       
  1391     case BT_SEMI:
       
  1392     case BT_EXCL:
       
  1393     case BT_AST:
       
  1394     case BT_PERCNT:
       
  1395     case BT_NUM:
       
  1396 #ifdef XML_NS
       
  1397     case BT_COLON:
       
  1398 #endif
       
  1399       break;
       
  1400     case BT_S:
       
  1401       if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
       
  1402         *badPtr = ptr;
       
  1403         return 0;
       
  1404       }
       
  1405       break;
       
  1406     case BT_NAME:
       
  1407     case BT_NMSTRT:
       
  1408       if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
       
  1409         break;
       
  1410     default:
       
  1411       switch (BYTE_TO_ASCII(enc, ptr)) {
       
  1412       case 0x24: /* $ */
       
  1413       case 0x40: /* @ */
       
  1414         break;
       
  1415       default:
       
  1416         *badPtr = ptr;
       
  1417         return 0;
       
  1418       }
       
  1419       break;
       
  1420     }
       
  1421   }
       
  1422   return 1;
       
  1423 }
       
  1424 
       
  1425 /* This must only be called for a well-formed start-tag or empty
       
  1426    element tag.  Returns the number of attributes.  Pointers to the
       
  1427    first attsMax attributes are stored in atts.
       
  1428 */
       
  1429 
       
  1430 static int PTRCALL
       
  1431 PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
       
  1432                 int attsMax, ATTRIBUTE *atts)
       
  1433 {
       
  1434   enum { other, inName, inValue } state = inName;
       
  1435   int nAtts = 0;
       
  1436   int open = 0; /* defined when state == inValue;
       
  1437                    initialization just to shut up compilers */
       
  1438 
       
  1439   for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
       
  1440     switch (BYTE_TYPE(enc, ptr)) {
       
  1441 #define START_NAME \
       
  1442       if (state == other) { \
       
  1443         if (nAtts < attsMax) { \
       
  1444           atts[nAtts].name = ptr; \
       
  1445           atts[nAtts].normalized = 1; \
       
  1446         } \
       
  1447         state = inName; \
       
  1448       }
       
  1449 #define LEAD_CASE(n) \
       
  1450     case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
       
  1451     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1452 #undef LEAD_CASE
       
  1453     case BT_NONASCII:
       
  1454     case BT_NMSTRT:
       
  1455     case BT_HEX:
       
  1456       START_NAME
       
  1457       break;
       
  1458 #undef START_NAME
       
  1459     case BT_QUOT:
       
  1460       if (state != inValue) {
       
  1461         if (nAtts < attsMax)
       
  1462           atts[nAtts].valuePtr = ptr + MINBPC(enc);
       
  1463         state = inValue;
       
  1464         open = BT_QUOT;
       
  1465       }
       
  1466       else if (open == BT_QUOT) {
       
  1467         state = other;
       
  1468         if (nAtts < attsMax)
       
  1469           atts[nAtts].valueEnd = ptr;
       
  1470         nAtts++;
       
  1471       }
       
  1472       break;
       
  1473     case BT_APOS:
       
  1474       if (state != inValue) {
       
  1475         if (nAtts < attsMax)
       
  1476           atts[nAtts].valuePtr = ptr + MINBPC(enc);
       
  1477         state = inValue;
       
  1478         open = BT_APOS;
       
  1479       }
       
  1480       else if (open == BT_APOS) {
       
  1481         state = other;
       
  1482         if (nAtts < attsMax)
       
  1483           atts[nAtts].valueEnd = ptr;
       
  1484         nAtts++;
       
  1485       }
       
  1486       break;
       
  1487     case BT_AMP:
       
  1488       if (nAtts < attsMax)
       
  1489         atts[nAtts].normalized = 0;
       
  1490       break;
       
  1491     case BT_S:
       
  1492       if (state == inName)
       
  1493         state = other;
       
  1494       else if (state == inValue
       
  1495                && nAtts < attsMax
       
  1496                && atts[nAtts].normalized
       
  1497                && (ptr == atts[nAtts].valuePtr
       
  1498                    || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
       
  1499                    || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
       
  1500                    || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
       
  1501         atts[nAtts].normalized = 0;
       
  1502       break;
       
  1503     case BT_CR: case BT_LF:
       
  1504       /* This case ensures that the first attribute name is counted
       
  1505          Apart from that we could just change state on the quote. */
       
  1506       if (state == inName)
       
  1507         state = other;
       
  1508       else if (state == inValue && nAtts < attsMax)
       
  1509         atts[nAtts].normalized = 0;
       
  1510       break;
       
  1511     case BT_GT:
       
  1512     case BT_SOL:
       
  1513       if (state != inValue)
       
  1514         return nAtts;
       
  1515       break;
       
  1516     default:
       
  1517       break;
       
  1518     }
       
  1519   }
       
  1520   /* not reached */
       
  1521 }
       
  1522 
       
  1523 static int PTRFASTCALL
       
  1524 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
       
  1525 {
       
  1526   int result = 0;
       
  1527   /* skip &# */
       
  1528   ptr += 2*MINBPC(enc);
       
  1529   if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
       
  1530     for (ptr += MINBPC(enc);
       
  1531          !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
       
  1532          ptr += MINBPC(enc)) {
       
  1533       int c = BYTE_TO_ASCII(enc, ptr);
       
  1534       switch (c) {
       
  1535       case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
       
  1536       case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
       
  1537         result <<= 4;
       
  1538         result |= (c - ASCII_0);
       
  1539         break;
       
  1540       case ASCII_A: case ASCII_B: case ASCII_C:
       
  1541       case ASCII_D: case ASCII_E: case ASCII_F:
       
  1542         result <<= 4;
       
  1543         result += 10 + (c - ASCII_A);
       
  1544         break;
       
  1545       case ASCII_a: case ASCII_b: case ASCII_c:
       
  1546       case ASCII_d: case ASCII_e: case ASCII_f:
       
  1547         result <<= 4;
       
  1548         result += 10 + (c - ASCII_a);
       
  1549         break;
       
  1550       }
       
  1551       if (result >= 0x110000)
       
  1552         return -1;
       
  1553     }
       
  1554   }
       
  1555   else {
       
  1556     for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
       
  1557       int c = BYTE_TO_ASCII(enc, ptr);
       
  1558       result *= 10;
       
  1559       result += (c - ASCII_0);
       
  1560       if (result >= 0x110000)
       
  1561         return -1;
       
  1562     }
       
  1563   }
       
  1564   return checkCharRefNumber(result);
       
  1565 }
       
  1566 
       
  1567 static int PTRCALL
       
  1568 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
       
  1569                              const char *end)
       
  1570 {
       
  1571   switch ((end - ptr)/MINBPC(enc)) {
       
  1572   case 2:
       
  1573     if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
       
  1574       switch (BYTE_TO_ASCII(enc, ptr)) {
       
  1575       case ASCII_l:
       
  1576         return ASCII_LT;
       
  1577       case ASCII_g:
       
  1578         return ASCII_GT;
       
  1579       }
       
  1580     }
       
  1581     break;
       
  1582   case 3:
       
  1583     if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
       
  1584       ptr += MINBPC(enc);
       
  1585       if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
       
  1586         ptr += MINBPC(enc);
       
  1587         if (CHAR_MATCHES(enc, ptr, ASCII_p))
       
  1588           return ASCII_AMP;
       
  1589       }
       
  1590     }
       
  1591     break;
       
  1592   case 4:
       
  1593     switch (BYTE_TO_ASCII(enc, ptr)) {
       
  1594     case ASCII_q:
       
  1595       ptr += MINBPC(enc);
       
  1596       if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
       
  1597         ptr += MINBPC(enc);
       
  1598         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
       
  1599           ptr += MINBPC(enc);
       
  1600           if (CHAR_MATCHES(enc, ptr, ASCII_t))
       
  1601             return ASCII_QUOT;
       
  1602         }
       
  1603       }
       
  1604       break;
       
  1605     case ASCII_a:
       
  1606       ptr += MINBPC(enc);
       
  1607       if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
       
  1608         ptr += MINBPC(enc);
       
  1609         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
       
  1610           ptr += MINBPC(enc);
       
  1611           if (CHAR_MATCHES(enc, ptr, ASCII_s))
       
  1612             return ASCII_APOS;
       
  1613         }
       
  1614       }
       
  1615       break;
       
  1616     }
       
  1617   }
       
  1618   return 0;
       
  1619 }
       
  1620 
       
  1621 static int PTRCALL
       
  1622 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
       
  1623 {
       
  1624   for (;;) {
       
  1625     switch (BYTE_TYPE(enc, ptr1)) {
       
  1626 #define LEAD_CASE(n) \
       
  1627     case BT_LEAD ## n: \
       
  1628       if (*ptr1++ != *ptr2++) \
       
  1629         return 0;
       
  1630     LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
       
  1631 #undef LEAD_CASE
       
  1632       /* fall through */
       
  1633       if (*ptr1++ != *ptr2++)
       
  1634         return 0;
       
  1635       break;
       
  1636     case BT_NONASCII:
       
  1637     case BT_NMSTRT:
       
  1638 #ifdef XML_NS
       
  1639     case BT_COLON:
       
  1640 #endif
       
  1641     case BT_HEX:
       
  1642     case BT_DIGIT:
       
  1643     case BT_NAME:
       
  1644     case BT_MINUS:
       
  1645       if (*ptr2++ != *ptr1++)
       
  1646         return 0;
       
  1647       if (MINBPC(enc) > 1) {
       
  1648         if (*ptr2++ != *ptr1++)
       
  1649           return 0;
       
  1650         if (MINBPC(enc) > 2) {
       
  1651           if (*ptr2++ != *ptr1++)
       
  1652             return 0;
       
  1653           if (MINBPC(enc) > 3) {
       
  1654             if (*ptr2++ != *ptr1++)
       
  1655               return 0;
       
  1656           }
       
  1657         }
       
  1658       }
       
  1659       break;
       
  1660     default:
       
  1661       if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
       
  1662         return 1;
       
  1663       switch (BYTE_TYPE(enc, ptr2)) {
       
  1664       case BT_LEAD2:
       
  1665       case BT_LEAD3:
       
  1666       case BT_LEAD4:
       
  1667       case BT_NONASCII:
       
  1668       case BT_NMSTRT:
       
  1669 #ifdef XML_NS
       
  1670       case BT_COLON:
       
  1671 #endif
       
  1672       case BT_HEX:
       
  1673       case BT_DIGIT:
       
  1674       case BT_NAME:
       
  1675       case BT_MINUS:
       
  1676         return 0;
       
  1677       default:
       
  1678         return 1;
       
  1679       }
       
  1680     }
       
  1681   }
       
  1682   /* not reached */
       
  1683 }
       
  1684 
       
  1685 static int PTRCALL
       
  1686 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
       
  1687                          const char *end1, const char *ptr2)
       
  1688 {
       
  1689   for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
       
  1690     if (ptr1 == end1)
       
  1691       return 0;
       
  1692     if (!CHAR_MATCHES(enc, ptr1, *ptr2))
       
  1693       return 0;
       
  1694   }
       
  1695   return ptr1 == end1;
       
  1696 }
       
  1697 
       
  1698 static int PTRFASTCALL
       
  1699 PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
       
  1700 {
       
  1701   const char *start = ptr;
       
  1702   for (;;) {
       
  1703     switch (BYTE_TYPE(enc, ptr)) {
       
  1704 #define LEAD_CASE(n) \
       
  1705     case BT_LEAD ## n: ptr += n; break;
       
  1706     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1707 #undef LEAD_CASE
       
  1708     case BT_NONASCII:
       
  1709     case BT_NMSTRT:
       
  1710 #ifdef XML_NS
       
  1711     case BT_COLON:
       
  1712 #endif
       
  1713     case BT_HEX:
       
  1714     case BT_DIGIT:
       
  1715     case BT_NAME:
       
  1716     case BT_MINUS:
       
  1717       ptr += MINBPC(enc);
       
  1718       break;
       
  1719     default:
       
  1720       return (int)(ptr - start);
       
  1721     }
       
  1722   }
       
  1723 }
       
  1724 
       
  1725 static const char * PTRFASTCALL
       
  1726 PREFIX(skipS)(const ENCODING *enc, const char *ptr)
       
  1727 {
       
  1728   for (;;) {
       
  1729     switch (BYTE_TYPE(enc, ptr)) {
       
  1730     case BT_LF:
       
  1731     case BT_CR:
       
  1732     case BT_S:
       
  1733       ptr += MINBPC(enc);
       
  1734       break;
       
  1735     default:
       
  1736       return ptr;
       
  1737     }
       
  1738   }
       
  1739 }
       
  1740 
       
  1741 static void PTRCALL
       
  1742 PREFIX(updatePosition)(const ENCODING *enc,
       
  1743                        const char *ptr,
       
  1744                        const char *end,
       
  1745                        POSITION *pos)
       
  1746 {
       
  1747   while (ptr != end) {
       
  1748     switch (BYTE_TYPE(enc, ptr)) {
       
  1749 #define LEAD_CASE(n) \
       
  1750     case BT_LEAD ## n: \
       
  1751       ptr += n; \
       
  1752       break;
       
  1753     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
       
  1754 #undef LEAD_CASE
       
  1755     case BT_LF:
       
  1756       pos->columnNumber = (XML_Size)-1;
       
  1757       pos->lineNumber++;
       
  1758       ptr += MINBPC(enc);
       
  1759       break;
       
  1760     case BT_CR:
       
  1761       pos->lineNumber++;
       
  1762       ptr += MINBPC(enc);
       
  1763       if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
       
  1764         ptr += MINBPC(enc);
       
  1765       pos->columnNumber = (XML_Size)-1;
       
  1766       break;
       
  1767     default:
       
  1768       ptr += MINBPC(enc);
       
  1769       break;
       
  1770     }
       
  1771     pos->columnNumber++;
       
  1772   }
       
  1773 }
       
  1774 
       
  1775 #undef DO_LEAD_CASE
       
  1776 #undef MULTIBYTE_CASES
       
  1777 #undef INVALID_CASES
       
  1778 #undef CHECK_NAME_CASE
       
  1779 #undef CHECK_NAME_CASES
       
  1780 #undef CHECK_NMSTRT_CASE
       
  1781 #undef CHECK_NMSTRT_CASES
       
  1782 
       
  1783 #endif /* XML_TOK_IMPL_C */