core/expat-2.0.1/xmlwf/ct.c
changeset 300 b6d834208d33
parent 296 ea3dbc023c80
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/expat-2.0.1/xmlwf/ct.c	Tue Feb 02 21:49:24 2016 +0000
@@ -0,0 +1,147 @@
+#define CHARSET_MAX 41
+
+static const char *
+getTok(const char **pp)
+{
+  enum { inAtom, inString, init, inComment };
+  int state = init;
+  const char *tokStart = 0;
+  for (;;) {
+    switch (**pp) {
+    case '\0':
+      return 0;
+    case ' ':
+    case '\r':
+    case '\t':
+    case '\n':
+      if (state == inAtom)
+        return tokStart;
+      break;
+    case '(':
+      if (state == inAtom)
+        return tokStart;
+      if (state != inString)
+        state++;
+      break;
+    case ')':
+      if (state > init)
+        --state;
+      else if (state != inString)
+        return 0;
+      break;
+    case ';':
+    case '/':
+    case '=':
+      if (state == inAtom)
+        return tokStart;
+      if (state == init)
+        return (*pp)++;
+      break;
+    case '\\':
+      ++*pp;
+      if (**pp == '\0')
+        return 0;
+      break;
+    case '"':
+      switch (state) {
+      case inString:
+        ++*pp;
+        return tokStart;
+      case inAtom:
+        return tokStart;
+      case init:
+        tokStart = *pp;
+        state = inString;
+        break;
+      }
+      break;
+    default:
+      if (state == init) {
+        tokStart = *pp;
+        state = inAtom;
+      }
+      break;
+    }
+    ++*pp;
+  }
+  /* not reached */
+}
+
+/* key must be lowercase ASCII */
+
+static int
+matchkey(const char *start, const char *end, const char *key)
+{
+  if (!start)
+    return 0;
+  for (; start != end; start++, key++)
+    if (*start != *key && *start != 'A' + (*key - 'a'))
+      return 0;
+  return *key == '\0';
+}
+
+void
+getXMLCharset(const char *buf, char *charset)
+{
+  const char *next, *p;
+
+  charset[0] = '\0';
+  next = buf;
+  p = getTok(&next);
+  if (matchkey(p, next, "text"))
+    strcpy(charset, "us-ascii");
+  else if (!matchkey(p, next, "application"))
+    return;
+  p = getTok(&next);
+  if (!p || *p != '/')
+    return;
+  p = getTok(&next);
+  if (matchkey(p, next, "xml"))
+    isXml = 1;
+  p = getTok(&next);
+  while (p) {
+    if (*p == ';') {
+      p = getTok(&next);
+      if (matchkey(p, next, "charset")) {
+        p = getTok(&next);
+        if (p && *p == '=') {
+          p = getTok(&next);
+          if (p) {
+            char *s = charset;
+            if (*p == '"') {
+              while (++p != next - 1) {
+                if (*p == '\\')
+                  ++p;
+                if (s == charset + CHARSET_MAX - 1) {
+                  charset[0] = '\0';
+                  break;
+                }
+                *s++ = *p;
+              }
+              *s++ = '\0';
+            }
+            else {
+              if (next - p > CHARSET_MAX - 1)
+                break;
+              while (p != next)
+                *s++ = *p++;
+              *s = 0;
+              break;
+            }
+          }
+        }
+      }
+    }
+  else
+    p = getTok(&next);
+  }
+}
+
+int
+main(int argc, char **argv)
+{
+  char buf[CHARSET_MAX];
+  getXMLCharset(argv[1], buf);
+  printf("charset = \"%s\"\n", buf);
+  return 0;
+}