--- a/CharacterEncoderImplementations__ISO10646_to_UTF8.st Thu Jul 05 11:45:05 2012 +0100
+++ b/CharacterEncoderImplementations__ISO10646_to_UTF8.st Wed Jul 18 17:55:48 2012 +0100
@@ -90,7 +90,8 @@
byte isNil ifTrue:[^ errorReporter value:'short utf8 string'].
ascii := (ascii bitShift:6) bitOr:(byte bitAnd:2r00111111).
(byte bitAnd:2r11000000) ~~ 2r10000000 ifTrue:[
- ^ errorReporter value:'illegal followbyte (next)'.].
+ ^ errorReporter value:'illegal followbyte (next)'.
+ ].
].
last6Bits := [
@@ -106,13 +107,16 @@
].
ascii := a.
(byte bitAnd:2r11000000) ~~ 2r10000000 ifTrue:[
- ^ errorReporter value:'illegal followbyte (last)'.].
+ ^ errorReporter value:'illegal followbyte (last)'.
+ ].
].
nBitsRequired := 8.
anyAbove7BitAscii := false.
sz := 0.
s := aStringOrByteCollection readStream.
+
+ "first determine the string size"
[s atEnd] whileFalse:[
byte := ascii := s nextByte.
(byte bitAnd:16r80) ~~ 0 ifTrue:[
@@ -225,8 +229,10 @@
ascii := (ascii bitShift:6) bitOr:(byte bitAnd:2r00111111).
].
- s := aStringOrByteCollection readStream.
+ s reset.
idx := 1.
+
+ "now fill the string"
[s atEnd] whileFalse:[
byte := ascii := s nextByte.
(byte bitAnd:2r10000000) ~~ 0 ifTrue:[
@@ -304,7 +310,7 @@
|s|
"/ avoid creation of new strings
- aUnicodeString contains8BitCharacters ifFalse:[^ aUnicodeString].
+ aUnicodeString contains8BitCharacters ifFalse:[^ aUnicodeString asSingleByteString].
s := WriteStream on:(String uninitializedNew:aUnicodeString size).
aUnicodeString do:[:eachCharacter |
@@ -412,7 +418,7 @@
!
nameOfEncoding
- ^ #'utf8'
+ ^ #utf8
! !
!ISO10646_to_UTF8 methodsFor:'stream support'!
@@ -452,11 +458,11 @@
!ISO10646_to_UTF8 class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8.st,v 1.16 2009/09/22 09:08:09 fm Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_UTF8.st,v 1.17 2012/07/10 07:55:50 stefan Exp $'
!
version_SVN
- ^ '$Id: CharacterEncoderImplementations__ISO10646_to_UTF8.st 10807 2012-05-05 21:58:24Z vranyj1 $'
+ ^ '$Id: CharacterEncoderImplementations__ISO10646_to_UTF8.st 10824 2012-07-18 16:55:48Z vranyj1 $'
! !