#BUGFIX by stefan
class: Character class
changed: #utf8DecodeFrom:
nil-handling
--- a/Character.st Tue Jan 09 17:45:56 2018 +0100
+++ b/Character.st Wed Jan 10 10:32:58 2018 +0100
@@ -200,92 +200,96 @@
!
utf8DecodeFrom:aStream
- "read and return a single unicode character from an UTF8 encoded stream"
+ "read and return a single unicode character from an UTF8 encoded stream.
+ Answer nil, if Stream>>#next answers nil."
|fetchNext c1 c2 codePoint|
c1 := aStream next.
+ c1 isNil ifTrue:[
+ ^ nil.
+ ].
codePoint := c1 codePoint.
codePoint <= 16r7F ifTrue:[
- "/ 0xxxxxxx - 7 bits
- ^ c1 asCharacter.
+ "/ 0xxxxxxx - 7 bits
+ ^ c1 asCharacter.
].
(codePoint bitAnd:2r11000000) == 2r10000000 ifTrue:[
- "/ out of sync (got an intermediate character)
- InvalidEncodingError raiseRequestWith:codePoint errorString:' - out of sync'.
- ^ c1 asCharacter.
+ "/ out of sync (got an intermediate character)
+ InvalidEncodingError raiseRequestWith:codePoint errorString:' - out of sync'.
+ ^ c1 asCharacter.
].
fetchNext := [ |code|
- code := aStream next codePoint.
- (code bitAnd:2r11000000) == 2r10000000 ifFalse:[
- "/ followup chars must have 2r10 in high bits
- InvalidEncodingError raiseRequestWith:code.
- ^ c1 asCharacter.
- ].
- code bitAnd:16r3F
- ].
+ code := aStream next codePoint.
+ (code bitAnd:2r11000000) == 2r10000000 ifFalse:[
+ "/ followup chars must have 2r10 in high bits
+ InvalidEncodingError raiseRequestWith:code.
+ ^ c1 asCharacter.
+ ].
+ code bitAnd:16r3F
+ ].
(codePoint bitAnd:2r11100000) == 2r11000000 ifTrue:[
- "/ 110xxxxx 10xxxxxx - 11 bits
- codePoint := codePoint bitAnd:16r1F.
- codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
- codePoint <= 16r7F ifTrue:[
- InvalidEncodingError raiseRequestWith:codePoint.
- ].
- ^ Character codePoint:codePoint
+ "/ 110xxxxx 10xxxxxx - 11 bits
+ codePoint := codePoint bitAnd:16r1F.
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint <= 16r7F ifTrue:[
+ InvalidEncodingError raiseRequestWith:codePoint.
+ ].
+ ^ Character codePoint:codePoint
].
(codePoint bitAnd:2r11110000) == 2r11100000 ifTrue:[
- "/ 1110xxxx 10xxxxxx 10xxxxxx - 16 bits
- codePoint := codePoint bitAnd:16r0F.
- codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
- codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
- codePoint <= 16r7FF ifTrue:[
- InvalidEncodingError raiseRequestWith:codePoint.
- ].
- ^ Character codePoint:codePoint
+ "/ 1110xxxx 10xxxxxx 10xxxxxx - 16 bits
+ codePoint := codePoint bitAnd:16r0F.
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint <= 16r7FF ifTrue:[
+ InvalidEncodingError raiseRequestWith:codePoint.
+ ].
+ ^ Character codePoint:codePoint
].
(codePoint bitAnd:2r11111000) == 2r11110000 ifTrue:[
- "/ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 21 bits
- codePoint := codePoint bitAnd:16r07.
- codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
- codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
- codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
- codePoint <= 16rFFFF ifTrue:[
- InvalidEncodingError raiseRequestWith:codePoint.
- ].
- ^ Character codePoint:codePoint
+ "/ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 21 bits
+ codePoint := codePoint bitAnd:16r07.
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint <= 16rFFFF ifTrue:[
+ InvalidEncodingError raiseRequestWith:codePoint.
+ ].
+ ^ Character codePoint:codePoint
].
(codePoint bitAnd:2r11111100) == 2r11111000 ifTrue:[
- "/ 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 26 bits
- codePoint := codePoint bitAnd:16r03.
- codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
- codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
- codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
- codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
- codePoint <= 16r1FFFFF ifTrue:[
- InvalidEncodingError raiseRequestWith:codePoint.
- ].
- ^ Character codePoint:codePoint
+ "/ 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 26 bits
+ codePoint := codePoint bitAnd:16r03.
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint <= 16r1FFFFF ifTrue:[
+ InvalidEncodingError raiseRequestWith:codePoint.
+ ].
+ ^ Character codePoint:codePoint
].
(codePoint bitAnd:2r11111110) == 2r11111100 ifTrue:[
- "/ 1111110x ... 10xxxxxx - any number of bits
- codePoint := codePoint bitAnd:16r01.
-
- c2 := aStream peek.
- [c2 notNil and:[(c2 codePoint bitAnd:2r11000000) == 2r10000000]] whileTrue:[
- codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
- aStream next.
- c2 := aStream peek.
- ].
- codePoint <= 16r3FFFFFF ifTrue:[
- InvalidEncodingError raiseRequestWith:codePoint.
- ].
- ^ Character codePoint:codePoint
+ "/ 1111110x ... 10xxxxxx - any number of bits
+ codePoint := codePoint bitAnd:16r01.
+
+ c2 := aStream peek.
+ [c2 notNil and:[(c2 codePoint bitAnd:2r11000000) == 2r10000000]] whileTrue:[
+ codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
+ aStream next.
+ c2 := aStream peek.
+ ].
+ codePoint <= 16r3FFFFFF ifTrue:[
+ InvalidEncodingError raiseRequestWith:codePoint.
+ ].
+ ^ Character codePoint:codePoint
].
InvalidEncodingError raiseRequestWith:codePoint.
@@ -301,14 +305,16 @@
|utf8Encoding original readBack|
1 to:16rFFFF do:[:codePoint |
- original := Character value:codePoint.
- utf8Encoding := original utf8Encoded.
- readBack := Character utf8DecodeFrom:(utf8Encoding readStream).
- readBack codePoint = codePoint ifFalse:[
- self halt
- ]
+ original := Character value:codePoint.
+ utf8Encoding := original utf8Encoded.
+ readBack := Character utf8DecodeFrom:(utf8Encoding readStream).
+ readBack codePoint = codePoint ifFalse:[
+ self halt
+ ]
]
"
+
+ "Modified (comment): / 10-01-2018 / 10:31:03 / stefan"
!
value:anInteger
@@ -317,6 +323,7 @@
^ self codePoint:anInteger
! !
+
!Character class methodsFor:'accessing untypeable characters'!
controlCharacter:char
@@ -375,6 +382,7 @@
^ self codePoint:41
! !
+
!Character class methodsFor:'constants'!
backspace