Class reading optimization [1/4]: optimize CharacterArray class>>decodeFromJavaUTF8:
authorJan Vrany <jan.vrany@fit.cvut.cz>
Mon, 04 Aug 2014 15:43:51 +0100
changeset 3195 0b6a9ff08acd
parent 3192 b6bced0551a9
child 3196 d617b4590431
Class reading optimization [1/4]: optimize CharacterArray class>>decodeFromJavaUTF8: This method now uses hand-optimized inline C to decode bytes. Also now it takes instance of a String instead of a ByteArray to avoid excessive byte copying from byte array to string in case all characters are below 0x80. An Unicode16String is create only when input bytes encode a character greater then 0x80. *** Classr reader optimization [3/x]: call self info: conditionally if Verbose flag is set. This avoids an extra message send and a lots of string concatenations. Thanks Claus Gittinger for spotting this.
JavaClassReader.st
JavaUTF8Tests.st
extensions.st
tests/libjava/src/stx/libjava/tests/StringTests.java
tools/tools.rc
--- a/JavaClassReader.st	Sun Aug 03 23:43:40 2014 +0100
+++ b/JavaClassReader.st	Mon Aug 04 15:43:51 2014 +0100
@@ -578,10 +578,12 @@
                 ] ifFalse:[
                     path := '<memory>'
                 ].
-                self info:
-                    ('class reader: file ', path , ' has version ' ,
-                         majorVsn printString ,
-                            '.' , minorVsn printString)
+                Verbose ifTrue:[ 
+                    self info:
+                        ('class reader: file ', path , ' has version ' ,
+                             majorVsn printString ,
+                                '.' , minorVsn printString)
+                ].
 
             ].
     "/
@@ -724,7 +726,7 @@
     "Modified: / 15-10-2010 / 17:37:38 / Jan Kurs <kurs.jan@post.cz>"
     "Modified: / 28-01-2011 / 15:09:48 / Marcel Hlopko <hlopik@gmail.com>"
     "Modified: / 18-05-2011 / 15:30:29 / Marcel Hlopko <hlopkmar@fel.cvut.cz>"
-    "Modified: / 12-11-2013 / 00:39:53 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 15:40:43 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
 readStream:aStream ignoring:classesBeingLoaded
@@ -1496,20 +1498,22 @@
     "/ get constant pool
     "/
     constantPoolSize := inStream nextUnsignedShortMSB: msb.
-    self info: 'constantPoolSize = ' , constantPoolSize printString.
+    Verbose ifTrue:[ 
+        self info: 'constantPoolSize = ' , constantPoolSize printString.
+    ].
     constants := JavaConstantPool new: constantPoolSize - 1.
     constSlot := 1.
     [ constSlot < constantPoolSize ] whileTrue: [
-            self info: 'const slot: ' , constSlot printString.
+            Verbose ifTrue:[ 
+                self info: 'const slot: ' , constSlot printString.
+            ].
             const := self readConstant.
             constants at: constSlot put: const.
 
             "/ long & double consts take 2 slots
             "/ (only first is used)
 
-            constNeeds2Slots
-                ifTrue: [ constSlot := constSlot + 2. ]
-                ifFalse: [ constSlot := constSlot + 1. ]].
+            constSlot := constNeeds2Slots ifTrue: [ constSlot + 2. ] ifFalse: [ constSlot + 1. ]].
     constSlot := -1.
 
     constants do:[:ref|ref isJavaRef ifTrue:[ref preResolve]].
@@ -1532,7 +1536,7 @@
 
     "Modified: / 07-05-1998 / 11:44:06 / cg"
     "Created: / 13-05-2011 / 16:52:54 / Marcel Hlopko <hlopkmar@fel.cvut.cz>"
-    "Modified: / 15-10-2012 / 23:15:42 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 15:41:37 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
 readConstant_Asciz
@@ -1565,14 +1569,18 @@
     | nameIndex |
 
     nameIndex := inStream nextUnsignedShortMSB: msb.
-    ((constants at: nameIndex) isNil)
-        ifTrue:
-            [ self info: ('reading class; index=' , nameIndex printString , ' name='
-                        , (constants at: nameIndex) printString) ]
-        ifFalse: [ self info: ('reading class; index= ' , nameIndex printString) ].
+    Verbose ifTrue:[ 
+        ((constants at: nameIndex) isNil) ifTrue: [ 
+                self info: ('reading class; index=' , nameIndex printString , ' name='
+                            , (constants at: nameIndex) printString) 
+        ] ifFalse: [ 
+            self info: ('reading class; index= ' , nameIndex printString)   
+        ].
+    ].
     ^ JavaClassRef2 in: constants withNameAt: nameIndex.
 
     "Modified: / 18-05-2011 / 18:21:16 / Marcel Hlopko <hlopkmar@fel.cvut.cz>"
+    "Modified: / 04-08-2014 / 15:39:41 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
 readConstant_Double
@@ -1634,15 +1642,18 @@
 
     classIndex := inStream nextUnsignedShortMSB: msb.
     nameAndTypeIndex := inStream nextUnsignedShortMSB: msb.
-    self
-        info: 'reading fieldref; classindex=' , classIndex printString
-                , ' nameAndTypeIndex=' , nameAndTypeIndex printString.
+    Verbose ifTrue:[
+        self
+            info: 'reading fieldref; classindex=' , classIndex printString
+                    , ' nameAndTypeIndex=' , nameAndTypeIndex printString.
+    ].
     ^ JavaFieldRef2
         in: constants
         withNameAndTypeAt: nameAndTypeIndex
         andClassAt: classIndex.
 
     "Modified: / 15-05-2011 / 14:20:23 / Marcel Hlopko <hlopkmar@fel.cvut.cz>"
+    "Modified: / 04-08-2014 / 15:38:58 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
 readConstant_Float
@@ -1714,15 +1725,18 @@
 
     classIndex := inStream nextUnsignedShortMSB: msb.
     nameAndTypeIndex := inStream nextUnsignedShortMSB: msb.
-    self
-        info: 'reading interface methodref; classindex=' , classIndex printString
-                , ' nameAndTypeIndex=' , nameAndTypeIndex printString.
+    Verbose ifTrue:[
+        self
+            info: 'reading interface methodref; classindex=' , classIndex printString
+                    , ' nameAndTypeIndex=' , nameAndTypeIndex printString.
+    ].
     ^ JavaInterfaceMethodRef2
         in: constants
         withNameAndTypeAt: nameAndTypeIndex
         andClassAt: classIndex.
 
     "Modified: / 12-05-2011 / 18:57:47 / Marcel Hlopko <hlopkmar@fel.cvut.cz>"
+    "Modified: / 04-08-2014 / 15:38:39 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
 readConstant_InvokeDynamic
@@ -1793,15 +1807,18 @@
 
     classIndex := inStream nextUnsignedShortMSB: msb.
     nameAndTypeIndex := inStream nextUnsignedShortMSB: msb.
-    self
-        info: 'reading methodref; classindex=' , classIndex printString , ' nameAndTypeIndex='
-                , nameAndTypeIndex printString.
+    Verbose ifTrue:[
+        self
+            info: 'reading methodref; classindex=' , classIndex printString , ' nameAndTypeIndex='
+                    , nameAndTypeIndex printString.
+    ].
     ^ JavaMethodRef2
         in: constants
         withNameAndTypeAt: nameAndTypeIndex
         andClassAt: classIndex.
 
     "Modified: / 15-05-2011 / 14:20:38 / Marcel Hlopko <hlopkmar@fel.cvut.cz>"
+    "Modified: / 04-08-2014 / 15:38:24 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
 readConstant_NameAndType
@@ -1809,15 +1826,18 @@
 
     nameIndex := inStream nextUnsignedShortMSB: msb.
     descriptorIndex := inStream nextUnsignedShortMSB: msb.
-    self
-        info: 'reading nameAndType; nameindex=' , nameIndex printString
-                , ' descriptorIndex= ' , descriptorIndex printString.
+    Verbose ifTrue:[
+        self
+            info: 'reading nameAndType; nameindex=' , nameIndex printString
+                    , ' descriptorIndex= ' , descriptorIndex printString.
+    ].
     ^JavaNameAndType2
                 in: constants
                 withNameAt: nameIndex
                 andDescriptorAt: descriptorIndex.
 
     "Modified: / 10-05-2011 / 17:09:47 / Marcel Hlopko <hlopkmar@fel.cvut.cz>"
+    "Modified: / 04-08-2014 / 15:38:08 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
 readConstant_String
@@ -1855,15 +1875,15 @@
 !
 
 readConstant_Utf8
-    | len  bytes  string |
+    | len string |
 
     len := inStream nextUnsignedShortMSB: msb.
-    bytes := ByteArray new: len.
+    string  := String uninitializedNew: len.
     inStream
         nextBytes: len
-        into: bytes
+        into: string
         startingAt: 1.
-    string := CharacterArray fromJavaUTF8Bytes: bytes.
+    string := CharacterArray decodeFromJavaUTF8: string.
     Verbose
         ifTrue:
             [ Transcript
--- a/JavaUTF8Tests.st	Sun Aug 03 23:43:40 2014 +0100
+++ b/JavaUTF8Tests.st	Mon Aug 04 15:43:51 2014 +0100
@@ -55,85 +55,203 @@
 
 !JavaUTF8Tests methodsFor:'loadingTests'!
 
-testSingleByteCharacters
-    |bytes|
+test_S1
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[97 98 99 100] asString.
+
+    self assert: string = 'abcd'
+
+    "Created: / 04-08-2014 / 11:25:46 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:50 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
+test_S10
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[97 98 99 100 192 128] asString.
 
-    bytes := ByteArray new:1.
-    bytes at:1 put:65.
-    self assert:((CharacterArray decodeFromJavaUTF8:bytes) = 'A').
+    self assert: string size = 5.
+    self assert: string fifth  codePoint = 16r0000.
+
+    "Created: / 04-08-2014 / 11:45:04 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:05 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
 
-    "Created: / 04-02-2011 / 14:18:10 / Marcel Hlopko <hlopik@gmail.com>"
+test_S11
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[194 128 97 98 99 100] asString.
+
+    self assert: string size = 5.
+    self assert: string first codePoint = 16r0080.
+
+    "Created: / 04-08-2014 / 11:45:24 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:15 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
-testSixByteCharacters
-    | bytes  convertedString  expectedString |
+test_S12
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[97 98 99 100 224 160 128] asString.
+
+    self assert: string size = 5.
+    self assert: string fifth codePoint = 16r0800.
+
+    "Created: / 04-08-2014 / 11:45:59 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:18 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
+test_S13
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[224 160 128 97 98 99 100] asString .
+
+    self assert: string size = 5.
+    self assert: string first codePoint = 16r0800.
+
+    "Created: / 04-08-2014 / 11:46:14 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:20 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
 
-    bytes := ByteArray new: 6.
-    bytes at: 1 put: 2r11101101.
-    bytes at: 2 put: 2r10101111.
-    bytes at: 3 put: 2r10111111.
-    bytes at: 4 put: 2r11101101.
-    bytes at: 5 put: 2r10111111.
-    bytes at: 6 put: 2r10101111.
-    expectedString := Unicode32String new: 1.
-    expectedString at: 1
-        put: (Character codePoint: (((((2r11111111111111111111)))))).
-    convertedString := CharacterArray decodeFromJavaUTF8: bytes.
-    self assert: convertedString = expectedString.
+test_S14
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[97 98 99 100 222 170 239 190 170] asString .
+
+    self assert: string size = 6.
+    self assert: string fifth codePoint = 16r07AA.
+    self assert: string sixth codePoint = 16rFFAA.
 
-    "Created: / 04-02-2011 / 15:25:27 / Marcel Hlopko <hlopik@gmail.com>"
-    "Modified: / 13-03-2011 / 15:44:24 / Marcel Hlopko <hlopkmar@fel.cvut.cz>"
+    "Created: / 04-08-2014 / 11:47:20 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:23 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
+test_S15
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[222 170 239 190 170 97 98 99 100] asString.
+
+    self assert: string size = 6.
+    self assert: string first codePoint = 16r07AA.
+    self assert: string second codePoint = 16rFFAA.
+
+    "Created: / 04-08-2014 / 11:47:53 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:27 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
-testSixByteCharactersTrap
-    |bytes convertedString expectedString|
+test_S2
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[1 127] asString.
+
+    self assert: string size = 2.
+    self assert: string first  codePoint = 1.
+    self assert: string second codePoint = 16r7F.
+
+    "Created: / 04-08-2014 / 11:26:50 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:29 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
+test_S3
+    
+    | string |
 
-    bytes := ByteArray new:6.
-    bytes at:1 put:2r11101101.
-    bytes at:2 put:2r10101111.
-    bytes at:3 put:2r10111111.
-    bytes at:4 put:2r11101101.
-    bytes at:5 put:2r10101111.
-    bytes at:6 put:2r10111111.
-    expectedString := Unicode32String new:2.
-    expectedString at:1
-        put:(Character codePoint:2r1101101111111111).
-    expectedString at:2
-                    put:(Character codePoint:2r1101101111111111).
-    convertedString := CharacterArray decodeFromJavaUTF8:bytes.
-    self assert:convertedString = expectedString.
+    string := CharacterArray decodeFromJavaUTF8: #[192 128] asString.
+
+    self assert: string size = 1.
+    self assert: string first  codePoint = 16r0000.
+
+    "Created: / 04-08-2014 / 11:27:22 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:32 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
 
-    "Created: / 04-02-2011 / 16:01:32 / Marcel Hlopko <hlopik@gmail.com>"
+test_S4
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[194 128] asString .
+
+    self assert: string size = 1.
+    self assert: string first  codePoint = 16r0080.
+
+    "Created: / 04-08-2014 / 11:27:46 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:34 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
-testThreeByteCharacters
-    |bytes convertedString expectedString|
+test_S5
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[192 128 194 128] asString .
+
+    self assert: string size = 2.
+    self assert: string first  codePoint = 16r0000.
+    self assert: string second codePoint = 16r0080.
 
-    bytes := ByteArray new:3.
-    bytes at:1 put:2r11101111.
-    bytes at:2 put:2r10001111.
-    bytes at:3 put:2r10101111.
-    expectedString := Unicode16String new:1.
-    expectedString at:1 put:(Character codePoint:2r1111001111101111).
-    convertedString := CharacterArray decodeFromJavaUTF8:bytes.    
-    self assert:convertedString = expectedString.
+    "Created: / 04-08-2014 / 11:28:36 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:36 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
+test_S6
+    
+    | string |
 
-    "Created: / 04-02-2011 / 15:06:03 / Marcel Hlopko <hlopik@gmail.com>"
+    string := CharacterArray decodeFromJavaUTF8: #[223 191] asString  .
+
+    self assert: string size = 1.
+    self assert: string first  codePoint = 16r07FF.
+
+    "Created: / 04-08-2014 / 11:30:25 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:39 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 !
 
-testTwoByteCharacters
-    |bytes convertedString expectedString|
+test_S7
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8:#[224 160 128] asString .
+
+    self assert: string size = 1.
+    self assert: string first  codePoint = 16r0800.
+
+    "Created: / 04-08-2014 / 11:30:50 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:47:42 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
+test_S8
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[239 191 191] asString.
 
-    bytes := ByteArray new:2.
-    bytes at:1 put:2r11011111.
-    bytes at:2 put:2r10001111.
-    expectedString := Unicode16String new:1.
-    expectedString at:1 put:(Character codePoint:2r11111001111).
-    convertedString := CharacterArray decodeFromJavaUTF8:bytes.    
-    self assert:convertedString = expectedString.
+    self assert: string size = 1.
+    self assert: string first  codePoint = 16rFFFF.
+
+    "Created: / 04-08-2014 / 11:30:52 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:48:04 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
 
-    "Created: / 04-02-2011 / 14:31:26 / Marcel Hlopko <hlopik@gmail.com>"
-    "Modified: / 04-02-2011 / 16:02:16 / Marcel Hlopko <hlopik@gmail.com>"
+test_S9
+    
+    | string |
+
+    string := CharacterArray decodeFromJavaUTF8: #[224 160 128 239 191 191] asString.
+
+    self assert: string size = 2.
+    self assert: string first  codePoint = 16r0800.
+    self assert: string second codePoint = 16rFFFF.
+
+    "Created: / 04-08-2014 / 11:33:17 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:48:07 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 ! !
 
 !JavaUTF8Tests class methodsFor:'documentation'!
--- a/extensions.st	Sun Aug 03 23:43:40 2014 +0100
+++ b/extensions.st	Mon Aug 04 15:43:51 2014 +0100
@@ -417,96 +417,64 @@
 
 !CharacterArray class methodsFor:'encoding & decoding'!
 
-decodeFromJavaUTF8: bytes
+decodeFromJavaUTF8: string8
     "Decodes a string from modified UTF8 encoding
      as used in Java .class files. see
      'The class file format specification', section 4.5.7"
 
-    | string  i  s  b  codePoint  realLength |
-
-    string := String new: bytes size.
-    realLength := bytes size.
-    s := bytes readStream.
-    i := 1.
-    [ s atEnd ] whileFalse:
-            [ b := s next.
-            (b & 2r10000000) == 0
-                ifTrue: [ codePoint := b ]
-                ifFalse:
-                    [ self assert: (b & 2r01000000) = 2r01000000.
-                    (b & 2r00100000) = 0
-                        ifTrue:
-                            [ "two byte utf char"
-                            realLength := realLength - 1.
-                            self assert: s size > 0.
-                            self assert: (b & 2r01000000) = 2r01000000.
-                            string bitsPerCharacter < 16
-                                ifTrue: [ string := Unicode16String fromString: string ].
-                            codePoint := (b & 2r00011111) << 6.
-                            b := s next.
-                            self assert: (b & 2r11000000) = 2r10000000.
-                            codePoint := codePoint + (b & 2r00111111). ]
-                        ifFalse:
-                            [ "at lease 3 byte utf char"
-                            realLength := realLength - 2.
-                            string bitsPerCharacter < 16"was: 32"
-                                ifTrue: [ string := Unicode16String"was: Unicode32String" fromString: string ].
-                            self assert: s size > 1.
-                            (b & 2r00010000) = 0
-                                ifTrue:
-                                    [ | utf32Possible  utf32Value |
-
-                                    "3 or 6 byte utf char"
-                                    self assert: s size > 1.
-                                    s size < 5
-                                        ifTrue: [ utf32Possible := false ]
-                                        ifFalse: [ utf32Possible := true ].
-                                    b ~= 2r11101101 ifTrue: [ utf32Possible := false ].
-                                    codePoint := (b & 2r00001111) << 12.
-                                    b := s next.
-                                    self assert: (b & 2r11000000) = 2r10000000.
-                                    ((b & 2r11110000) = 2r10100000 and: [ utf32Possible ])
-                                        ifTrue: [ utf32Value := 2r00010000 + ((b & 2r00001111) << 16) ]
-                                        ifFalse: [ utf32Possible := false ].
-                                    codePoint := codePoint + ((b & 2r00111111) << 6).
-                                    b := s next.
-                                    self assert: (b & 2r11000000) = 2r10000000.
-                                    utf32Possible
-                                        ifTrue: [ utf32Value := utf32Value + ((b & 2r00111111) << 10) ].
-                                    codePoint := codePoint + (b & 2r00111111).
-                                    utf32Possible
-                                        ifTrue:
-                                            [ | tmpB |
-
-                                            string bitsPerCharacter < 32
-                                                ifTrue: [ string := Unicode32String fromString: string ].
-                                            tmpB := s copy.
-                                            b := tmpB next.
-                                            b = 2r11101101
-                                                ifTrue:
-                                                    [ b := tmpB next.
-                                                    (b & 2r11110000) = 2r10110000
-                                                        ifTrue:
-                                                            [ utf32Value := utf32Value + ((b & 2r00001111) << 6).
-                                                            b := tmpB next.
-                                                            self assert: (b & 2r11000000) = 2r10000000.
-                                                            utf32Value := utf32Value + (b & 2r00111111).
-                                                            codePoint := utf32Value.
-                                                            realLength := realLength - 3. s position: tmpB position.] ] ] ]
-                                ifFalse:
-                                    [ "should not happen, ask mh"
-                                    self halt. ] ] ].
-            string at: i put: (Character codePoint: codePoint).
-            i := i + 1. ].
-    ^ string subString: 1 to: realLength.
-
-    "
-        String decodeFromJavaUTF8: 'Hello world' asByteArray"
+    | string16 |
+%{
+    if (__isString(string8) ) {
+        unsigned char *ptr;
+        unsigned short c;
+        unsigned short* dst;
+        int len;
+
+        ptr = __stringVal(string8);
+        len = 0;
+        while (*ptr) {
+            len++;
+            if ( ( *ptr & 0x80) == 0 ) {
+                ptr++;
+            } else {
+                len--;
+                goto non7bitString;
+            }
+        }
+        RETURN ( string8 );
+    non7bitString:
+        while (*ptr) {
+            len++;
+            ptr += (*ptr & 0x80) ? ((*ptr & 0x20) ? 3 : 2) : 1;
+        }
+
+        string16 = __MKEMPTYUSTRING(len);
+        ptr = __stringVal(string8);
+        dst = __unicode16StringVal(string16);
+        while (*ptr) {
+            int x = *ptr++;
+            if ( x & 0x80 ) {
+                int y = *ptr++;
+                if ( x & 0x20 ) {
+                    int z = *ptr++;
+                    *dst++ = ( (x & 0xf ) << 12 ) + ( ( y & 0x3f ) << 6 ) + ( z & 0x3f);
+                } else {
+                    *dst++ = ( ( x & 0x1f ) << 6 ) + ( y & 0x3f );
+                }
+            } else {
+                *dst++ = x;
+            }
+        }
+        RETURN ( string16 );
+    }
+%}.
+    self primitiveFailed.
 
     "Created: / 22-12-2010 / 23:45:04 / Jan Vrany <jan.vrany@fit.cvut.cz>"
     "Modified: / 09-02-2011 / 01:12:25 / Marcel Hlopko <hlopik@gmail.com>"
     "Modified: / 13-03-2011 / 15:52:36 / Marcel Hlopko <hlopkmar@fel.cvut.cz>"
-    "Modified: / 09-12-2011 / 19:49:04 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:45:26 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+
 ! !
 
 !CharacterArray class methodsFor:'instance creation'!
@@ -516,7 +484,7 @@
      from the modified utf8 encoded bytes as specified in
      The class file format specification, section 4.5.7"
 
-    ^ self decodeFromJavaUTF8:aByteCollection.
+    ^ self decodeFromJavaUTF8:aByteCollection asString.
 
     "
      CharacterArray fromUTF8Bytes:#[ 16r41 16r42 ]
@@ -535,6 +503,7 @@
     "
 
     "Created: / 23-12-2010 / 09:01:32 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 04-08-2014 / 14:48:28 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 ! !
 
 !CharacterArray class methodsFor:'queries'!
--- a/tests/libjava/src/stx/libjava/tests/StringTests.java	Sun Aug 03 23:43:40 2014 +0100
+++ b/tests/libjava/src/stx/libjava/tests/StringTests.java	Mon Aug 04 15:43:51 2014 +0100
@@ -1,18 +1,19 @@
 package stx.libjava.tests;
 
+import org.junit.Test;
+
 import junit.framework.TestCase;
 
 
 public class StringTests extends TestCase {
 	
+	@Test
 	public void test_01() {
-		String s;
-		StringBuilder sb = new StringBuilder();
-		sb.append('\u0531');
-		s = sb.toString();
-		
-		
-		assert(!s.equals("Hi"));
+	    String s = "\u1FFF";
+	    
+	    assertEquals(1, s.length());
+	    assertEquals(0x1FFF, (int)s.charAt(0));
+	    
 	}
 	
 }
--- a/tools/tools.rc	Sun Aug 03 23:43:40 2014 +0100
+++ b/tools/tools.rc	Mon Aug 04 15:43:51 2014 +0100
@@ -3,7 +3,7 @@
 // automagically generated from the projectDefinition: stx_libjava_tools.
 //
 VS_VERSION_INFO VERSIONINFO
-  FILEVERSION     6,2,32767,32767
+  FILEVERSION     6,2,5000,5000
   PRODUCTVERSION  6,2,4,0
 #if (__BORLANDC__)
   FILEFLAGSMASK   VS_FF_DEBUG | VS_FF_PRERELEASE
@@ -20,12 +20,12 @@
     BEGIN
       VALUE "CompanyName", "eXept Software AG\0"
       VALUE "FileDescription", "Smalltalk/X Class library (LIB)\0"
-      VALUE "FileVersion", "6.2.32767.32767\0"
+      VALUE "FileVersion", "6.2.5000.5000\0"
       VALUE "InternalName", "stx:libjava/tools\0"
       VALUE "LegalCopyright", "Copyright Claus Gittinger 1988-2011\nCopyright eXept Software AG 1998-2011\0"
       VALUE "ProductName", "Smalltalk/X\0"
       VALUE "ProductVersion", "6.2.4.0\0"
-      VALUE "ProductDate", "Thu, 24 Jul 2014 20:13:26 GMT\0"
+      VALUE "ProductDate", "Mon, 04 Aug 2014 11:44:03 GMT\0"
     END
 
   END