#TUNING by stefan
class: UninterpretedBytes
comment/format in: #utf8DecodedSize
changed: #utf8DecodedWithTwoByteCharactersReplacedBy:
us #utf8DecodedSize when allocating buffer
--- a/UninterpretedBytes.st Tue Jan 02 20:04:18 2018 +0100
+++ b/UninterpretedBytes.st Tue Jan 02 20:05:18 2018 +0100
@@ -4423,30 +4423,34 @@
the decoded string. Suppress all 2-byte (above 16rFF) characters,
and replace them with replacementCharacter"
- |in out c|
+ |in out|
self containsNon7BitAscii ifFalse:[
- ^ self asSingleByteString
+ ^ self asSingleByteString "plain ASCII"
].
- out := WriteStream on:(String uninitializedNew:self size * 3 // 2).
+ out := WriteStream on:(String uninitializedNew:self utf8DecodedSize).
in := self readStream.
[in atEnd] whileFalse:[
- c := Character utf8DecodeFrom:in.
- c codePoint > 16rFF ifTrue:[
- c := replacementCharacter
- ].
- out nextPut:c.
+ |c|
+
+ c := Character utf8DecodeFrom:in.
+ c codePoint > 16rFF ifTrue:[
+ c := replacementCharacter
+ ].
+ out nextPut:c.
].
^ out contents
"
(Character value:16r220) utf8Encoded
- utf8DecodedWithTwoByteCharactersReplacedBy:(Character space)
+ utf8DecodedWithTwoByteCharactersReplacedBy:(Character space)
(Character value:16r220) utf8Encoded asExternalBytes copyButLast
- utf8DecodedWithTwoByteCharactersReplacedBy:(Character space)
- "
+ utf8DecodedWithTwoByteCharactersReplacedBy:(Character space)
+ "
+
+ "Modified (comment): / 02-01-2018 / 18:54:18 / stefan"
! !
!UninterpretedBytes methodsFor:'filling & replacing'!
@@ -5171,7 +5175,7 @@
utf8DecodedSize
"return the number of charcters needed wnen this string is
- decoded from UTL-8"
+ decoded from UTF-8"
|sz "{ Class:SmallInteger }"
cnt "{ Class:SmallInteger }"|
@@ -5180,10 +5184,10 @@
cnt := 0.
1 to:sz do:[:idx|
- "/ count the number of UTF-8 start bytes
- ((self byteAt:idx) bitAnd:16rC0) ~~ 16r80 ifTrue:[
- cnt := cnt+1.
- ].
+ "/ count the number of UTF-8 start bytes
+ ((self byteAt:idx) bitAnd:16rC0) ~~ 16r80 ifTrue:[
+ cnt := cnt+1.
+ ].
].
^ cnt.
@@ -5195,6 +5199,7 @@
"Created: / 07-02-2017 / 15:03:07 / stefan"
"Modified: / 07-02-2017 / 19:14:06 / stefan"
+ "Modified (comment): / 02-01-2018 / 18:30:27 / stefan"
! !
!UninterpretedBytes methodsFor:'testing'!