*** empty log message ***
authorClaus Gittinger <cg@exept.de>
Thu, 11 Mar 2004 16:39:08 +0100
changeset 1374 e351232c03e0
parent 1373 65473183582d
child 1375 80969e1428a8
*** empty log message ***
SourceCodeManagerUtilities.st
--- a/SourceCodeManagerUtilities.st	Thu Mar 11 10:55:56 2004 +0100
+++ b/SourceCodeManagerUtilities.st	Thu Mar 11 16:39:08 2004 +0100
@@ -1917,7 +1917,7 @@
             w notNil ifTrue:[
                 enc := withoutQuotes value.
                 (CharacterEncoder encoderFor:enc ifAbsent:nil) notNil ifTrue:[
-                    ^ enc
+                    ^ enc asSymbol
                 ].
                 enc size >=3 ifTrue:[
                     Transcript showCR:'Unknown encoding: ' , withoutQuotes value.
@@ -1928,6 +1928,67 @@
     ^ nil
 !
 
+guessEncodingOfFile:aFilename
+    "look for a string
+        encoding #name
+     or:
+        encoding: name
+     within the given buffer 
+     (which is usually the first few bytes of a textFile).
+     If thats not found, use heuristics (in CharacterArray) to guess."
+
+    |s buffer n "{Class: SmallInteger }"
+     binary enc|
+
+    s := aFilename asFilename readStreamOrNil.
+    s isNil ifTrue:[^ nil].
+
+    buffer := String new:2048.
+    n := buffer size.
+    n := s nextBytes:n into:buffer.
+    s close.
+
+    enc := self guessEncodingOfBuffer:buffer.
+    enc notNil ifTrue:[^ enc].
+
+    binary := false.
+    1 to:n do:[:i |
+        (buffer at:i) isPrintable ifFalse:[binary := true].
+    ].
+
+    "/ look for JIS7 / EUC encoding
+    (buffer findString:(CharacterEncoder jisISO2022EscapeSequence)) ~~ 0 ifTrue:[
+        ^ #'iso2020-jp'
+    ].
+    (buffer findString:(CharacterEncoder jis7KanjiEscapeSequence)) ~~ 0 ifTrue:[
+        ^ #jis7
+    ].
+    (buffer findString:(CharacterEncoder jis7KanjiOldEscapeSequence)) ~~ 0 ifTrue:[
+        ^ #jis7
+    ].
+
+    "/ TODO:
+
+"/    "/ look for EUC
+"/    idx := aString findFirst:[:char | |ascii|
+"/                                        ((ascii := char asciiValue) >= 16rA1)     
+"/                                        and:[ascii <= 16rFE]].
+"/    idx ~~ 0 ifTrue:[
+"/        ascii := (aString at:(idx + 1)) asciiValue.
+"/        (ascii >= 16rA1 and:[ascii <= 16rFE]) ifTrue:[
+"/            ^ #euc
+"/        ]
+"/    ].
+    "/ look for SJIS ...
+
+    ^ nil
+
+    "
+     SourceCodeManagerUtilities guessEncodingOfFile:'../../libview2/resources/ApplicationModel_de.rs' asFilename
+     SourceCodeManagerUtilities guessEncodingOfFile:'../../libview2/resources/ApplicationModel_ru.rs' asFilename
+    "
+!
+
 guessEncodingOfStream:aStream
     "look for a string of the form
             encoding #name
@@ -2176,5 +2237,5 @@
 !SourceCodeManagerUtilities class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic3/SourceCodeManagerUtilities.st,v 1.98 2004-03-09 21:41:42 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic3/SourceCodeManagerUtilities.st,v 1.99 2004-03-11 15:39:08 cg Exp $'
 ! !