172
|
1 |
#define CHARSET_MAX 41
|
|
2 |
|
|
3 |
static const char *
|
|
4 |
getTok(const char **pp)
|
|
5 |
{
|
|
6 |
enum { inAtom, inString, init, inComment };
|
|
7 |
int state = init;
|
|
8 |
const char *tokStart = 0;
|
|
9 |
for (;;) {
|
|
10 |
switch (**pp) {
|
|
11 |
case '\0':
|
|
12 |
return 0;
|
|
13 |
case ' ':
|
|
14 |
case '\r':
|
|
15 |
case '\t':
|
|
16 |
case '\n':
|
|
17 |
if (state == inAtom)
|
|
18 |
return tokStart;
|
|
19 |
break;
|
|
20 |
case '(':
|
|
21 |
if (state == inAtom)
|
|
22 |
return tokStart;
|
|
23 |
if (state != inString)
|
|
24 |
state++;
|
|
25 |
break;
|
|
26 |
case ')':
|
|
27 |
if (state > init)
|
|
28 |
--state;
|
|
29 |
else if (state != inString)
|
|
30 |
return 0;
|
|
31 |
break;
|
|
32 |
case ';':
|
|
33 |
case '/':
|
|
34 |
case '=':
|
|
35 |
if (state == inAtom)
|
|
36 |
return tokStart;
|
|
37 |
if (state == init)
|
|
38 |
return (*pp)++;
|
|
39 |
break;
|
|
40 |
case '\\':
|
|
41 |
++*pp;
|
|
42 |
if (**pp == '\0')
|
|
43 |
return 0;
|
|
44 |
break;
|
|
45 |
case '"':
|
|
46 |
switch (state) {
|
|
47 |
case inString:
|
|
48 |
++*pp;
|
|
49 |
return tokStart;
|
|
50 |
case inAtom:
|
|
51 |
return tokStart;
|
|
52 |
case init:
|
|
53 |
tokStart = *pp;
|
|
54 |
state = inString;
|
|
55 |
break;
|
|
56 |
}
|
|
57 |
break;
|
|
58 |
default:
|
|
59 |
if (state == init) {
|
|
60 |
tokStart = *pp;
|
|
61 |
state = inAtom;
|
|
62 |
}
|
|
63 |
break;
|
|
64 |
}
|
|
65 |
++*pp;
|
|
66 |
}
|
|
67 |
/* not reached */
|
|
68 |
}
|
|
69 |
|
|
70 |
/* key must be lowercase ASCII */
|
|
71 |
|
|
72 |
static int
|
|
73 |
matchkey(const char *start, const char *end, const char *key)
|
|
74 |
{
|
|
75 |
if (!start)
|
|
76 |
return 0;
|
|
77 |
for (; start != end; start++, key++)
|
|
78 |
if (*start != *key && *start != 'A' + (*key - 'a'))
|
|
79 |
return 0;
|
|
80 |
return *key == '\0';
|
|
81 |
}
|
|
82 |
|
|
83 |
void
|
|
84 |
getXMLCharset(const char *buf, char *charset)
|
|
85 |
{
|
|
86 |
const char *next, *p;
|
|
87 |
|
|
88 |
charset[0] = '\0';
|
|
89 |
next = buf;
|
|
90 |
p = getTok(&next);
|
|
91 |
if (matchkey(p, next, "text"))
|
|
92 |
strcpy(charset, "us-ascii");
|
|
93 |
else if (!matchkey(p, next, "application"))
|
|
94 |
return;
|
|
95 |
p = getTok(&next);
|
|
96 |
if (!p || *p != '/')
|
|
97 |
return;
|
|
98 |
p = getTok(&next);
|
|
99 |
if (matchkey(p, next, "xml"))
|
|
100 |
isXml = 1;
|
|
101 |
p = getTok(&next);
|
|
102 |
while (p) {
|
|
103 |
if (*p == ';') {
|
|
104 |
p = getTok(&next);
|
|
105 |
if (matchkey(p, next, "charset")) {
|
|
106 |
p = getTok(&next);
|
|
107 |
if (p && *p == '=') {
|
|
108 |
p = getTok(&next);
|
|
109 |
if (p) {
|
|
110 |
char *s = charset;
|
|
111 |
if (*p == '"') {
|
|
112 |
while (++p != next - 1) {
|
|
113 |
if (*p == '\\')
|
|
114 |
++p;
|
|
115 |
if (s == charset + CHARSET_MAX - 1) {
|
|
116 |
charset[0] = '\0';
|
|
117 |
break;
|
|
118 |
}
|
|
119 |
*s++ = *p;
|
|
120 |
}
|
|
121 |
*s++ = '\0';
|
|
122 |
}
|
|
123 |
else {
|
|
124 |
if (next - p > CHARSET_MAX - 1)
|
|
125 |
break;
|
|
126 |
while (p != next)
|
|
127 |
*s++ = *p++;
|
|
128 |
*s = 0;
|
|
129 |
break;
|
|
130 |
}
|
|
131 |
}
|
|
132 |
}
|
|
133 |
}
|
|
134 |
}
|
|
135 |
else
|
|
136 |
p = getTok(&next);
|
|
137 |
}
|
|
138 |
}
|
|
139 |
|
|
140 |
int
|
|
141 |
main(int argc, char **argv)
|
|
142 |
{
|
|
143 |
char buf[CHARSET_MAX];
|
|
144 |
getXMLCharset(argv[1], buf);
|
|
145 |
printf("charset = \"%s\"\n", buf);
|
|
146 |
return 0;
|
|
147 |
}
|