#guessEncodingOfBuffer - do NOT handle encoding=utf8
authorStefan Vogel <sv@exept.de>
Sat, 28 Jul 2007 20:12:36 +0200
changeset 10672 b6230a13035b
parent 10671 90b197d23f1b
child 10673 05229646ecd7
#guessEncodingOfBuffer - do NOT handle encoding=utf8 (only encoding: #utf8).
CharacterEncoder.st
--- a/CharacterEncoder.st	Sat Jul 28 20:09:44 2007 +0200
+++ b/CharacterEncoder.st	Sat Jul 28 20:12:36 2007 +0200
@@ -1001,28 +1001,15 @@
      within the given buffer 
      (which is usually the first few bytes of a textFile)."
 
-    |withoutQuotes lcBuffer quote|
-
-    withoutQuotes := 
-        [:word |
-            |result|
+    |lcBuffer quote|
 
-            result := word.
-            ((result startsWith:$") or:[(result startsWith:$')]) ifTrue:[
-                result := result copyFrom:2
-            ].
-            ((result endsWith:$") or:[(result endsWith:$')]) ifTrue:[
-                result := result copyWithoutLast:1
-            ].
-            result
-        ].
+    (buffer startsWith:#[16rEF 16rBB 16rBF] asString) ifTrue:[
+        ^ #utf8
+    ].
 
     lcBuffer := buffer asLowercase.
-    (buffer startsWith:#[16rEF 16rBB 16rBF] asString) ifTrue:[
-        ^ 'utf-8'
-    ].
 
-    #( 'charset' 'encoding' ) do:[:keyWord |
+    #(charset encoding) do:[:keyWord |
         |encoderOrNil idx s w enc|
 
         (idx := lcBuffer findString:keyWord) ~~ 0 ifTrue:[
@@ -1031,7 +1018,10 @@
             s skip:keyWord size.
             s skipSeparators. 
 
-            ['=:#' includes:s peek] whileTrue:[
+            "do not include '=' here, otherwise
+             files containing xml code (<?xml charset='utf8'> will be parsed as UTF-8"
+
+            [':#' includes:s peek] whileTrue:[
                 s next.
                 s skipSeparators. 
             ].
@@ -1043,10 +1033,9 @@
                 w := s upToSeparator.
             ].
             w notNil ifTrue:[
-                enc := withoutQuotes value:w.
-                encoderOrNil := self encoderFor:enc asSymbol ifAbsent:nil.
+                enc := w withoutQuotes asSymbol.
+                encoderOrNil := self encoderFor:enc ifAbsent:nil.
                 encoderOrNil notNil ifTrue:[
-                    "/ ^ enc asSymbol
                     ^ encoderOrNil nameOfEncoding
                 ].
 "/                enc size >=3 ifTrue:[
@@ -1523,7 +1512,7 @@
 !CharacterEncoder class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoder.st,v 1.96 2006-10-23 11:33:53 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoder.st,v 1.97 2007-07-28 18:12:36 stefan Exp $'
 ! !
 
 CharacterEncoder initialize!