core/expat-2.0.1/xmlwf/ct.c
changeset 300 b6d834208d33
parent 296 ea3dbc023c80
equal deleted inserted replaced
299:f06c5eba524d 300:b6d834208d33
       
     1 #define CHARSET_MAX 41
       
     2 
       
     3 static const char *
       
     4 getTok(const char **pp)
       
     5 {
       
     6   enum { inAtom, inString, init, inComment };
       
     7   int state = init;
       
     8   const char *tokStart = 0;
       
     9   for (;;) {
       
    10     switch (**pp) {
       
    11     case '\0':
       
    12       return 0;
       
    13     case ' ':
       
    14     case '\r':
       
    15     case '\t':
       
    16     case '\n':
       
    17       if (state == inAtom)
       
    18         return tokStart;
       
    19       break;
       
    20     case '(':
       
    21       if (state == inAtom)
       
    22         return tokStart;
       
    23       if (state != inString)
       
    24         state++;
       
    25       break;
       
    26     case ')':
       
    27       if (state > init)
       
    28         --state;
       
    29       else if (state != inString)
       
    30         return 0;
       
    31       break;
       
    32     case ';':
       
    33     case '/':
       
    34     case '=':
       
    35       if (state == inAtom)
       
    36         return tokStart;
       
    37       if (state == init)
       
    38         return (*pp)++;
       
    39       break;
       
    40     case '\\':
       
    41       ++*pp;
       
    42       if (**pp == '\0')
       
    43         return 0;
       
    44       break;
       
    45     case '"':
       
    46       switch (state) {
       
    47       case inString:
       
    48         ++*pp;
       
    49         return tokStart;
       
    50       case inAtom:
       
    51         return tokStart;
       
    52       case init:
       
    53         tokStart = *pp;
       
    54         state = inString;
       
    55         break;
       
    56       }
       
    57       break;
       
    58     default:
       
    59       if (state == init) {
       
    60         tokStart = *pp;
       
    61         state = inAtom;
       
    62       }
       
    63       break;
       
    64     }
       
    65     ++*pp;
       
    66   }
       
    67   /* not reached */
       
    68 }
       
    69 
       
    70 /* key must be lowercase ASCII */
       
    71 
       
    72 static int
       
    73 matchkey(const char *start, const char *end, const char *key)
       
    74 {
       
    75   if (!start)
       
    76     return 0;
       
    77   for (; start != end; start++, key++)
       
    78     if (*start != *key && *start != 'A' + (*key - 'a'))
       
    79       return 0;
       
    80   return *key == '\0';
       
    81 }
       
    82 
       
    83 void
       
    84 getXMLCharset(const char *buf, char *charset)
       
    85 {
       
    86   const char *next, *p;
       
    87 
       
    88   charset[0] = '\0';
       
    89   next = buf;
       
    90   p = getTok(&next);
       
    91   if (matchkey(p, next, "text"))
       
    92     strcpy(charset, "us-ascii");
       
    93   else if (!matchkey(p, next, "application"))
       
    94     return;
       
    95   p = getTok(&next);
       
    96   if (!p || *p != '/')
       
    97     return;
       
    98   p = getTok(&next);
       
    99   if (matchkey(p, next, "xml"))
       
   100     isXml = 1;
       
   101   p = getTok(&next);
       
   102   while (p) {
       
   103     if (*p == ';') {
       
   104       p = getTok(&next);
       
   105       if (matchkey(p, next, "charset")) {
       
   106         p = getTok(&next);
       
   107         if (p && *p == '=') {
       
   108           p = getTok(&next);
       
   109           if (p) {
       
   110             char *s = charset;
       
   111             if (*p == '"') {
       
   112               while (++p != next - 1) {
       
   113                 if (*p == '\\')
       
   114                   ++p;
       
   115                 if (s == charset + CHARSET_MAX - 1) {
       
   116                   charset[0] = '\0';
       
   117                   break;
       
   118                 }
       
   119                 *s++ = *p;
       
   120               }
       
   121               *s++ = '\0';
       
   122             }
       
   123             else {
       
   124               if (next - p > CHARSET_MAX - 1)
       
   125                 break;
       
   126               while (p != next)
       
   127                 *s++ = *p++;
       
   128               *s = 0;
       
   129               break;
       
   130             }
       
   131           }
       
   132         }
       
   133       }
       
   134     }
       
   135   else
       
   136     p = getTok(&next);
       
   137   }
       
   138 }
       
   139 
       
   140 int
       
   141 main(int argc, char **argv)
       
   142 {
       
   143   char buf[CHARSET_MAX];
       
   144   getXMLCharset(argv[1], buf);
       
   145   printf("charset = \"%s\"\n", buf);
       
   146   return 0;
       
   147 }