#TUNING by stefan
authorStefan Vogel <sv@exept.de>
Tue, 07 Feb 2017 22:18:51 +0100
changeset 21377 a808dd6d6b4b
parent 21376 c0ba4a6db200
child 21378 1aa625b936ed
#TUNING by stefan class: UninterpretedBytes added: #utf8DecodedSize comment/format in: #utf8Decoded changed: #bcdByteAt:put: #byteAt: #byteAt:put: #signedByteAt: #signedByteAt:put: declare primitives NOCONTEXT
UninterpretedBytes.st
--- a/UninterpretedBytes.st	Tue Feb 07 22:15:28 2017 +0100
+++ b/UninterpretedBytes.st	Tue Feb 07 22:18:51 2017 +0100
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
 "
  COPYRIGHT (c) 1993 by Claus Gittinger
               All Rights Reserved
@@ -930,7 +932,7 @@
 longLongAt:index
     "return the 8-bytes starting at index as a signed Integer.
      The index is a smalltalk index (i.e. 1-based).
-     The value is retrieved in the machineÄs natural byte order.
+     The value is retrieved in the machineÄs natural byte order.
      This may be worth a primitive."
 
     ^ self signedInt64At:index MSB:IsBigEndian
@@ -1925,7 +1927,7 @@
      (i.e. the value n is encoded as: ((n // 10) * 16) + (n \\ 10)"
 
     (aNumber between:0 and:99) ifFalse:[
-        self error:'invalid value for BCD encoding'
+        self elementBoundsError:aNumber.
     ].
     ^ self byteAt:index put:aNumber encodeAsBCD
 
@@ -1937,13 +1939,14 @@
     "
 
     "Modified (comment): / 26-09-2011 / 11:57:36 / cg"
+    "Modified: / 07-02-2017 / 20:12:04 / stefan"
 !
 
 byteAt:byteIndex
     "return the byte at byteIndex as an unsigned 8 bit value in the range 0..255.
      The index is a smalltalk index (i.e. 1-based)."
 
-%{
+%{ /* NOCONTEXT */
     if (__isSmallInteger(byteIndex)) {
         unsigned char *cp;
         INT sz;
@@ -1964,10 +1967,19 @@
 
     "
      |b|
-     b := ByteArray new:3.
-     b at:1 put:16rFF.
-     b at:2 put:16r7F.
-     b at:3 put:16r80.
+     b := String new:3.
+     b byteAt:1 put:16rFF.
+     b byteAt:2 put:16r7F.
+     b byteAt:3 put:16r80.
+     b byteAt:1.    
+     b byteAt:2.     
+     b byteAt:3.     
+
+     |b|
+     b := ExternalBytes new:3.
+     b byteAt:1 put:16rFF.
+     b byteAt:2 put:16r7F.
+     b byteAt:3 put:16r80.
      b byteAt:1.    
      b byteAt:2.     
      b byteAt:3.     
@@ -1975,13 +1987,14 @@
 
     "Modified: / 01-07-1996 / 21:13:53 / cg"
     "Modified (comment): / 26-09-2011 / 11:57:14 / cg"
+    "Modified (comment): / 07-02-2017 / 19:49:13 / stefan"
 !
 
 byteAt:byteIndex put:anInteger
     "set the byte at byteIndex as an unsigned 8 bit value in the range 0..255.
      The index is a smalltalk index (i.e. 1-based)."
 
-%{
+%{ /* NOCONTEXT */
     if (__isSmallInteger(byteIndex) && __isSmallInteger(anInteger)) {
         unsigned char *cp;
         INT sz;
@@ -2005,14 +2018,16 @@
 
     "
      |b|
-     b := ByteArray new:3.
+     b := String new:3.
      b byteAt:1 put:16rFF.
      b byteAt:2 put:16r7F.
      b byteAt:3 put:16r80.
-     b signedByteAt:1.    
-     b signedByteAt:2.     
-     b signedByteAt:3.     
-    "
+     b byteAt:1.    
+     b byteAt:2.     
+     b byteAt:3.     
+    "
+
+    "Modified (comment): / 07-02-2017 / 19:32:26 / stefan"
 !
 
 signedByteAt:byteIndex
@@ -2020,7 +2035,7 @@
      The index is a smalltalk index (i.e. 1-based).
      This may be worth a primitive."
 
-%{
+%{ /* NOCONTEXT */
     /*
      * handle the most common cases fast ...
      */
@@ -2055,40 +2070,45 @@
      b at:1 put:16rFF.
      b at:2 put:16r7F.
      b at:3 put:16r80.
-     b byteAt:1.    
-     b byteAt:2.     
-     b byteAt:3.     
+     b signedByteAt:1.    
+     b signedByteAt:2.     
+     b signedByteAt:3.     
     "
 
     "Modified: / 01-07-1996 / 21:13:53 / cg"
     "Modified (comment): / 26-09-2011 / 11:57:14 / cg"
+    "Modified: / 07-02-2017 / 19:25:03 / stefan"
 !
 
-signedByteAt:index put:aSignedByteValue
-    "return the byte at index as a signed 8 bit value in the range -128..+127.
+signedByteAt:byteIndex put:aSignedByteValue
+    "set the byte at byteIndex to aSignedByteValue in the range -128 .. 255
      The index is a smalltalk index (i.e. 1-based).
-     Return the signedByteValue argument.
-     This may be worth a primitive."
-
-    |b "{ Class: SmallInteger }"|
-
-    aSignedByteValue >= 0 ifTrue:[
-        b := aSignedByteValue
-    ] ifFalse:[
-        b := 16r100 + aSignedByteValue
+     Return the signedByteValue argument."
+
+    |b|
+
+    b := aSignedByteValue.
+    b < 0 ifTrue:[
+        b := 16r100 + b
     ].
-    self byteAt:index put:b.
+    self byteAt:byteIndex put:b.
     ^ aSignedByteValue
 
     "
      |b|
      b := ByteArray new:2.
      b signedByteAt:1 put:-1.
-     b at:1
+     b at:1.
+     b signedByteAt:1.
+
+     |b|
+     b := ByteArray new:2.
+     b signedByteAt:1 put:-1.0.
     "
 
     "Modified: / 01-07-1996 / 21:12:37 / cg"
     "Modified (comment): / 26-09-2011 / 11:57:18 / cg"
+    "Modified (comment): / 07-02-2017 / 20:03:46 / stefan"
 ! !
 
 !UninterpretedBytes methodsFor:'accessing-floats & doubles'!
@@ -4266,17 +4286,19 @@
 
     "test:
 
-      |utf8Encoding original readBack|
-
-      1 to:16rFFFF do:[:ascii |
-        original := (Character value:ascii) asString.
+      1 to:16r10FFFF do:[:codepoint |
+        |utf8Encoding original readBack|
+
+        original := (Character value:codepoint) asString.
         utf8Encoding := original utf8Encoded.
         readBack := utf8Encoding utf8Decoded.
-        readBack = original ifFalse:[
+        readBack ~= original ifTrue:[
             self halt
         ]
       ]
     "
+
+    "Modified (comment): / 07-02-2017 / 17:36:08 / stefan"
 !
 
 utf8DecodedWithTwoByteCharactersReplacedBy:replacementCharacter
@@ -4962,8 +4984,8 @@
 
     "
      'abc' isValidUTF8
-     'abcöäü' isValidUTF8
-     'abcöäü' utf8Encoded isValidUTF8
+     'abcöäü' isValidUTF8
+     'abcöäü' utf8Encoded isValidUTF8
      (Character value:16r800) utf8Encoded isValidUTF8
      (Character value:16r1000) utf8Encoded isValidUTF8
      
@@ -5008,6 +5030,34 @@
     ^ super size
 
     "Created: / 5.3.1998 / 10:41:13 / stefan"
+!
+
+utf8DecodedSize
+    "return the number of charcters needed when this string is
+     decoded from UTF-8."
+
+    |sz "{ Class:SmallInteger }" 
+     cnt "{ Class:SmallInteger }"|
+
+    sz := self size.   
+    cnt := 0.
+
+    1 to:sz do:[:idx|
+        "/ count the number of UTF-8 start bytes
+        ((self byteAt:idx) bitAnd:16rC0) ~~ 16r80 ifTrue:[
+            cnt := cnt+1.
+        ].
+    ].
+    ^ cnt.
+
+    "
+     'hello world' asByteArray utf8DecodedSize
+     'ä' utf8Encoded asByteArray utf8DecodedSize
+     'äΣΔΨӕἤῴ' utf8Encoded asByteArray utf8DecodedSize
+    "
+
+    "Created: / 07-02-2017 / 15:03:07 / stefan"
+    "Modified: / 07-02-2017 / 19:14:06 / stefan"
 ! !
 
 !UninterpretedBytes methodsFor:'testing'!