#BUGFIX by stefan
authorStefan Vogel <sv@exept.de>
Wed, 10 Jan 2018 10:32:58 +0100
changeset 22409 5a6cfd90e913
parent 22408 09eceae5d786
child 22410 60be3e20e9f0
#BUGFIX by stefan class: Character class changed: #utf8DecodeFrom: nil-handling
Character.st
--- a/Character.st	Tue Jan 09 17:45:56 2018 +0100
+++ b/Character.st	Wed Jan 10 10:32:58 2018 +0100
@@ -200,92 +200,96 @@
 !
 
 utf8DecodeFrom:aStream
-    "read and return a single unicode character from an UTF8 encoded stream"
+    "read and return a single unicode character from an UTF8 encoded stream.
+     Answer nil, if Stream>>#next answers nil."
 
     |fetchNext c1 c2 codePoint|
 
     c1 := aStream next.
+    c1 isNil ifTrue:[
+        ^ nil.
+    ].
     codePoint := c1 codePoint.
     codePoint <= 16r7F ifTrue:[
-	"/ 0xxxxxxx - 7 bits
-	^ c1 asCharacter.
+        "/ 0xxxxxxx - 7 bits
+        ^ c1 asCharacter.
     ].
 
     (codePoint bitAnd:2r11000000) == 2r10000000 ifTrue:[
-	"/ out of sync (got an intermediate character)
-	InvalidEncodingError raiseRequestWith:codePoint errorString:' - out of sync'.
-	^ c1 asCharacter.
+        "/ out of sync (got an intermediate character)
+        InvalidEncodingError raiseRequestWith:codePoint errorString:' - out of sync'.
+        ^ c1 asCharacter.
     ].
 
     fetchNext := [  |code|
-		    code := aStream next codePoint.
-		    (code bitAnd:2r11000000) == 2r10000000 ifFalse:[
-			"/ followup chars must have 2r10 in high bits
-			InvalidEncodingError raiseRequestWith:code.
-			^ c1 asCharacter.
-		    ].
-		    code bitAnd:16r3F
-		 ].
+                    code := aStream next codePoint.
+                    (code bitAnd:2r11000000) == 2r10000000 ifFalse:[
+                        "/ followup chars must have 2r10 in high bits
+                        InvalidEncodingError raiseRequestWith:code.
+                        ^ c1 asCharacter.
+                    ].
+                    code bitAnd:16r3F
+                 ].
 
     (codePoint bitAnd:2r11100000) == 2r11000000 ifTrue:[
-	"/ 110xxxxx 10xxxxxx - 11 bits
-	codePoint := codePoint bitAnd:16r1F.
-	codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
-	codePoint <= 16r7F ifTrue:[
-	    InvalidEncodingError raiseRequestWith:codePoint.
-	].
-	^ Character codePoint:codePoint
+        "/ 110xxxxx 10xxxxxx - 11 bits
+        codePoint := codePoint bitAnd:16r1F.
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint <= 16r7F ifTrue:[
+            InvalidEncodingError raiseRequestWith:codePoint.
+        ].
+        ^ Character codePoint:codePoint
     ].
     (codePoint bitAnd:2r11110000) == 2r11100000 ifTrue:[
-	"/ 1110xxxx 10xxxxxx 10xxxxxx - 16 bits
-	codePoint := codePoint bitAnd:16r0F.
-	codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
-	codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
-	codePoint <= 16r7FF ifTrue:[
-	    InvalidEncodingError raiseRequestWith:codePoint.
-	].
-	^ Character codePoint:codePoint
+        "/ 1110xxxx 10xxxxxx 10xxxxxx - 16 bits
+        codePoint := codePoint bitAnd:16r0F.
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint <= 16r7FF ifTrue:[
+            InvalidEncodingError raiseRequestWith:codePoint.
+        ].
+        ^ Character codePoint:codePoint
     ].
 
     (codePoint bitAnd:2r11111000) == 2r11110000 ifTrue:[
-	"/ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 21 bits
-	codePoint := codePoint bitAnd:16r07.
-	codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
-	codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
-	codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
-	codePoint <= 16rFFFF ifTrue:[
-	    InvalidEncodingError raiseRequestWith:codePoint.
-	].
-	^ Character codePoint:codePoint
+        "/ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 21 bits
+        codePoint := codePoint bitAnd:16r07.
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint <= 16rFFFF ifTrue:[
+            InvalidEncodingError raiseRequestWith:codePoint.
+        ].
+        ^ Character codePoint:codePoint
     ].
 
     (codePoint bitAnd:2r11111100) == 2r11111000 ifTrue:[
-	"/ 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 26 bits
-	codePoint := codePoint bitAnd:16r03.
-	codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
-	codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
-	codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
-	codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
-	codePoint <= 16r1FFFFF ifTrue:[
-	    InvalidEncodingError raiseRequestWith:codePoint.
-	].
-	^ Character codePoint:codePoint
+        "/ 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 26 bits
+        codePoint := codePoint bitAnd:16r03.
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint <= 16r1FFFFF ifTrue:[
+            InvalidEncodingError raiseRequestWith:codePoint.
+        ].
+        ^ Character codePoint:codePoint
     ].
 
     (codePoint bitAnd:2r11111110) == 2r11111100 ifTrue:[
-	"/ 1111110x ... 10xxxxxx - any number of bits
-	codePoint := codePoint bitAnd:16r01.
-
-	c2 := aStream peek.
-	[c2 notNil and:[(c2 codePoint bitAnd:2r11000000) == 2r10000000]] whileTrue:[
-	    codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
-	    aStream next.
-	    c2 := aStream peek.
-	].
-	codePoint <= 16r3FFFFFF ifTrue:[
-	    InvalidEncodingError raiseRequestWith:codePoint.
-	].
-	^ Character codePoint:codePoint
+        "/ 1111110x ... 10xxxxxx - any number of bits
+        codePoint := codePoint bitAnd:16r01.
+
+        c2 := aStream peek.
+        [c2 notNil and:[(c2 codePoint bitAnd:2r11000000) == 2r10000000]] whileTrue:[
+            codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
+            aStream next.
+            c2 := aStream peek.
+        ].
+        codePoint <= 16r3FFFFFF ifTrue:[
+            InvalidEncodingError raiseRequestWith:codePoint.
+        ].
+        ^ Character codePoint:codePoint
     ].
 
     InvalidEncodingError raiseRequestWith:codePoint.
@@ -301,14 +305,16 @@
       |utf8Encoding original readBack|
 
       1 to:16rFFFF do:[:codePoint |
-	original := Character value:codePoint.
-	utf8Encoding := original utf8Encoded.
-	readBack := Character utf8DecodeFrom:(utf8Encoding readStream).
-	readBack codePoint = codePoint ifFalse:[
-	    self halt
-	]
+        original := Character value:codePoint.
+        utf8Encoding := original utf8Encoded.
+        readBack := Character utf8DecodeFrom:(utf8Encoding readStream).
+        readBack codePoint = codePoint ifFalse:[
+            self halt
+        ]
       ]
     "
+
+    "Modified (comment): / 10-01-2018 / 10:31:03 / stefan"
 !
 
 value:anInteger
@@ -317,6 +323,7 @@
     ^ self codePoint:anInteger
 ! !
 
+
 !Character class methodsFor:'accessing untypeable characters'!
 
 controlCharacter:char
@@ -375,6 +382,7 @@
     ^ self codePoint:41
 ! !
 
+
 !Character class methodsFor:'constants'!
 
 backspace