diff -r 72fee17c14b1 -r a055eb19e9d3 CharacterArray.st --- a/CharacterArray.st Tue Oct 22 20:00:37 2002 +0200 +++ b/CharacterArray.st Tue Oct 22 20:14:36 2002 +0200 @@ -157,7 +157,8 @@ "return a new string which represents the characters as decoded from the utf8 encoded bytes, aByteCollection. Returns either a normal String, or a TwoByteString instance. - Only useful, when reading twoByteStrings from external sources." + Only useful, when reading twoByteStrings from external sources. + This only handles up-to 16bit characters" |sz nBitsRequired ascii s byte newString idx| @@ -173,7 +174,10 @@ ascii := (ascii bitShift:6) bitOr:(byte bitAnd:16r3F). ascii > 16rFF ifTrue:[ nBitsRequired := nBitsRequired max:16 - ] + ]. + ascii <= 16r7F ifTrue:[ + self error:'invalid utf encoding'. + ]. ] ifFalse:[ (byte bitAnd:16rF0) == 16rE0 ifTrue:[ ascii := (byte bitAnd:16r0F). @@ -183,9 +187,12 @@ ascii := (ascii bitShift:6) bitOr:(byte bitAnd:16r3F). ascii > 16rFF ifTrue:[ nBitsRequired := nBitsRequired max:16 - ] + ]. + ascii <= 16r7FF ifTrue:[ + self error:'invalid utf encoding'. + ]. ] ifFalse:[ - self error:'bad utf encoding'. + self error:'bad/unsupported utf encoding'. ^ nil. ] ]. @@ -224,7 +231,16 @@ CharacterArray fromUTF8Bytes:#[ 16r41 16r42 ] CharacterArray fromUTF8Bytes:#[ 16rC1 16r02 ] CharacterArray fromUTF8Bytes:#[ 16rE0 16r81 16r02 ] - CharacterArray fromUTF8Bytes:#[ 16rEF 16rBF 16rBF ] + CharacterArray fromUTF8Bytes:#[ 16rEF 16rBF 16rBF ] + + rfc2279 examples: + CharacterArray fromUTF8Bytes:#[ 16r41 16rE2 16r89 16rA2 16rCE 16r91 16r2E ] + CharacterArray fromUTF8Bytes:#[ 16rED 16r95 16r9C 16rEA 16rB5 16rAD 16rEC 16r96 16rB4 ] + CharacterArray fromUTF8Bytes:#[ 16rE6 16r97 16rA5 16rE6 16r9C 16rAC 16rE8 16rAA 16r9E ] + + invalid: + CharacterArray fromUTF8Bytes:#[ 16rC0 16r80 ] + CharacterArray fromUTF8Bytes:#[ 16rE0 16r80 16r80 ] " ! @@ -4878,7 +4894,7 @@ c := Character utf8DecodeFrom:in. is16Bit ifFalse:[ c asciiValue > 16rFF ifTrue:[ - out := WriteStream with:(out contents asTwoByteString). + out := WriteStream with:(UnicodeString fromString:out contents). is16Bit := true. ]. ]. @@ -6211,7 +6227,7 @@ !CharacterArray class methodsFor:'documentation'! version - ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.228 2002-10-22 18:00:37 cg Exp $' + ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.229 2002-10-22 18:14:36 cg Exp $' ! ! CharacterArray initialize!