--- a/CharacterEncoder.st Mon Feb 04 15:24:58 2019 +0100
+++ b/CharacterEncoder.st Tue Feb 05 11:39:59 2019 +0100
@@ -1200,42 +1200,50 @@
"check for Unicode Byte Order Marks (BOM)"
EncodingDetectors
add:[:buffer |
- |guess byte1 byte2|
-
- byte1 := (buffer at:1) codePoint.
- byte2 := (buffer at:2) codePoint.
- byte1 < 16rFE ifTrue:[
- (byte1 = 16rEF
- and:[byte2 = 16rBB
- and:[(buffer at:3) codePoint = 16rBF]]) ifTrue:[
- guess := #utf8
+ |nb guess byte1 byte2 byte3 byte4|
+
+ (nb := buffer size) >= 2 ifTrue:[
+ byte1 := (buffer at:1) codePoint.
+ byte2 := (buffer at:2) codePoint.
+ nb > 2 ifTrue:[
+ byte3 := (buffer at:3) codePoint.
+ nb > 3 ifTrue:[
+ byte4 := (buffer at:4) codePoint.
+ ].
+ ].
+ byte1 < 16rFE ifTrue:[
+ (byte1 == 16rEF
+ and:[byte2 == 16rBB
+ and:[byte3 == 16rBF]]) ifTrue:[
+ guess := #utf8
+ ] ifFalse:[
+ (byte1 == 0
+ and:[byte2 == 0
+ and:[byte3 == 16rFE
+ and:[byte4 == 16rFF]]]) ifTrue:[
+ "00-00-FE-FF big endian utf32"
+ guess := #utf32be
+ ].
+ ]
] ifFalse:[
- (byte1 = 0
- and:[byte2 = 0
- and:[(buffer at:3) codePoint = 16rFE
- and:[(buffer at:4) codePoint = 16rFF]]]) ifTrue:[
- "00-00-FE-FF big endian utf32"
- guess := #utf32be
- ].
- ]
- ] ifFalse:[
- byte1 = 16rFF ifTrue:[
- byte2 = 16rFE ifTrue:[
- "FF-FE little endian utf16 or utf32"
- ((buffer at:3) codePoint = 0 and:[(buffer at:4) codePoint = 0]) ifTrue:[
- "FF-FE-00-00 little endian utf32"
- guess := #utf32le.
- ] ifFalse:[
- guess := #utf16le
- ]
- ].
- ] ifFalse:["byte1 = 16rFE"
- "FE-FF big endian utf16"
- byte2 = 16rFF ifTrue:[
- "big endian"
- guess := #utf16be
- ].
- ]
+ byte1 == 16rFF ifTrue:[
+ byte2 == 16rFE ifTrue:[
+ "FF-FE little endian utf16 or utf32"
+ (byte3 == 0 and:[byte4 == 0]) ifTrue:[
+ "FF-FE-00-00 little endian utf32"
+ guess := #utf32le.
+ ] ifFalse:[
+ guess := #utf16le
+ ]
+ ].
+ ] ifFalse:["byte1 = 16rFE"
+ "FE-FF big endian utf16"
+ byte2 == 16rFF ifTrue:[
+ "big endian"
+ guess := #utf16be
+ ].
+ ]
+ ].
].
guess
].
@@ -1352,6 +1360,7 @@
"/ ].
"Modified: / 17-01-2018 / 15:55:36 / stefan"
+ "Modified: / 05-02-2019 / 09:23:37 / Claus Gittinger"
!
showCharacterSet