--- a/CharacterArray.st Tue Oct 22 20:00:37 2002 +0200
+++ b/CharacterArray.st Tue Oct 22 20:14:36 2002 +0200
@@ -157,7 +157,8 @@
"return a new string which represents the characters as decoded
from the utf8 encoded bytes, aByteCollection.
Returns either a normal String, or a TwoByteString instance.
- Only useful, when reading twoByteStrings from external sources."
+ Only useful, when reading twoByteStrings from external sources.
+ This only handles up-to 16bit characters"
|sz nBitsRequired ascii s byte newString idx|
@@ -173,7 +174,10 @@
ascii := (ascii bitShift:6) bitOr:(byte bitAnd:16r3F).
ascii > 16rFF ifTrue:[
nBitsRequired := nBitsRequired max:16
- ]
+ ].
+ ascii <= 16r7F ifTrue:[
+ self error:'invalid utf encoding'.
+ ].
] ifFalse:[
(byte bitAnd:16rF0) == 16rE0 ifTrue:[
ascii := (byte bitAnd:16r0F).
@@ -183,9 +187,12 @@
ascii := (ascii bitShift:6) bitOr:(byte bitAnd:16r3F).
ascii > 16rFF ifTrue:[
nBitsRequired := nBitsRequired max:16
- ]
+ ].
+ ascii <= 16r7FF ifTrue:[
+ self error:'invalid utf encoding'.
+ ].
] ifFalse:[
- self error:'bad utf encoding'.
+ self error:'bad/unsupported utf encoding'.
^ nil.
]
].
@@ -224,7 +231,16 @@
CharacterArray fromUTF8Bytes:#[ 16r41 16r42 ]
CharacterArray fromUTF8Bytes:#[ 16rC1 16r02 ]
CharacterArray fromUTF8Bytes:#[ 16rE0 16r81 16r02 ]
- CharacterArray fromUTF8Bytes:#[ 16rEF 16rBF 16rBF ]
+ CharacterArray fromUTF8Bytes:#[ 16rEF 16rBF 16rBF ]
+
+ rfc2279 examples:
+ CharacterArray fromUTF8Bytes:#[ 16r41 16rE2 16r89 16rA2 16rCE 16r91 16r2E ]
+ CharacterArray fromUTF8Bytes:#[ 16rED 16r95 16r9C 16rEA 16rB5 16rAD 16rEC 16r96 16rB4 ]
+ CharacterArray fromUTF8Bytes:#[ 16rE6 16r97 16rA5 16rE6 16r9C 16rAC 16rE8 16rAA 16r9E ]
+
+ invalid:
+ CharacterArray fromUTF8Bytes:#[ 16rC0 16r80 ]
+ CharacterArray fromUTF8Bytes:#[ 16rE0 16r80 16r80 ]
"
!
@@ -4878,7 +4894,7 @@
c := Character utf8DecodeFrom:in.
is16Bit ifFalse:[
c asciiValue > 16rFF ifTrue:[
- out := WriteStream with:(out contents asTwoByteString).
+ out := WriteStream with:(UnicodeString fromString:out contents).
is16Bit := true.
].
].
@@ -6211,7 +6227,7 @@
!CharacterArray class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.228 2002-10-22 18:00:37 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.229 2002-10-22 18:14:36 cg Exp $'
! !
CharacterArray initialize!