--- a/Character.st Tue Jul 21 06:19:13 2015 +0100
+++ b/Character.st Tue Jul 21 06:19:27 2015 +0100
@@ -1,5 +1,3 @@
-"{ Encoding: utf8 }"
-
"
COPYRIGHT (c) 1988 by Claus Gittinger
All Rights Reserved
@@ -198,107 +196,94 @@
utf8DecodeFrom:aStream
"read and return a single unicode character from an UTF8 encoded stream"
- |fetchNext c1 c2 c3 c4 c5 codePoint|
+ |fetchNext c1 c2 codePoint|
c1 := aStream next.
codePoint := c1 codePoint.
codePoint <= 16r7F ifTrue:[
- "/ 0xxxxxxx - 7 bits
- ^ c1.
+ "/ 0xxxxxxx - 7 bits
+ ^ c1 asCharacter.
].
(codePoint bitAnd:2r11000000) == 2r10000000 ifTrue:[
- "/ out of sync (got an intermediate character)
- InvalidEncodingError raiseRequestWith:codePoint errorString:' - out of sync'.
- ^ c1.
+ "/ out of sync (got an intermediate character)
+ InvalidEncodingError raiseRequestWith:codePoint errorString:' - out of sync'.
+ ^ c1 asCharacter.
].
- fetchNext := [ |ch|
- ch := aStream next.
- (ch codePoint bitAnd:2r11000000) == 2r10000000 ifFalse:[
- "/ followup chars must have 2r10 in high bits
- InvalidEncodingError raiseRequestWith:ch codePoint.
- ^ c1.
- ].
- ch
- ].
+ fetchNext := [ |code|
+ code := aStream next codePoint.
+ (code bitAnd:2r11000000) == 2r10000000 ifFalse:[
+ "/ followup chars must have 2r10 in high bits
+ InvalidEncodingError raiseRequestWith:code.
+ ^ c1 asCharacter.
+ ].
+ code bitAnd:16r3F
+ ].
(codePoint bitAnd:2r11100000) == 2r11000000 ifTrue:[
- "/ 110xxxxx 10xxxxxx - 11 bits
- c2 := fetchNext value.
- codePoint := c1 codePoint bitAnd:16r1F.
- codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
- codePoint <= 16r7F ifTrue:[
- InvalidEncodingError raiseRequestWith:codePoint.
- ].
- ^ Character codePoint:codePoint
+ "/ 110xxxxx 10xxxxxx - 11 bits
+ codePoint := codePoint bitAnd:16r1F.
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint <= 16r7F ifTrue:[
+ InvalidEncodingError raiseRequestWith:codePoint.
+ ].
+ ^ Character codePoint:codePoint
].
(codePoint bitAnd:2r11110000) == 2r11100000 ifTrue:[
- "/ 1110xxxx 10xxxxxx 10xxxxxx - 16 bits
- c2 := fetchNext value.
- c3 := fetchNext value.
- codePoint := c1 codePoint bitAnd:16r0F.
- codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
- codePoint := (codePoint bitShift:6) bitOr:(c3 codePoint bitAnd:16r3F).
- codePoint <= 16r7FF ifTrue:[
- InvalidEncodingError raiseRequestWith:codePoint.
- ].
- ^ Character codePoint:codePoint
+ "/ 1110xxxx 10xxxxxx 10xxxxxx - 16 bits
+ codePoint := codePoint bitAnd:16r0F.
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint <= 16r7FF ifTrue:[
+ InvalidEncodingError raiseRequestWith:codePoint.
+ ].
+ ^ Character codePoint:codePoint
].
- "/ notice: currently, characters can only have 16bit encoding;
- "/ therefore the following will raise a runtime exception,
-
(codePoint bitAnd:2r11111000) == 2r11110000 ifTrue:[
- "/ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 21 bits
- c2 := fetchNext value.
- c3 := fetchNext value.
- c4 := fetchNext value.
- codePoint := c1 codePoint bitAnd:16r07.
- codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
- codePoint := (codePoint bitShift:6) bitOr:(c3 codePoint bitAnd:16r3F).
- codePoint := (codePoint bitShift:6) bitOr:(c4 codePoint bitAnd:16r3F).
- codePoint <= 16rFFFF ifTrue:[
- InvalidEncodingError raiseRequestWith:codePoint.
- ].
- ^ Character codePoint:codePoint
+ "/ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 21 bits
+ codePoint := codePoint bitAnd:16r07.
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint <= 16rFFFF ifTrue:[
+ InvalidEncodingError raiseRequestWith:codePoint.
+ ].
+ ^ Character codePoint:codePoint
].
(codePoint bitAnd:2r11111100) == 2r11111000 ifTrue:[
- "/ 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 26 bits
- c2 := fetchNext value.
- c3 := fetchNext value.
- c4 := fetchNext value.
- c5 := fetchNext value.
- codePoint := c1 codePoint bitAnd:16r03.
- codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
- codePoint := (codePoint bitShift:6) bitOr:(c3 codePoint bitAnd:16r3F).
- codePoint := (codePoint bitShift:6) bitOr:(c4 codePoint bitAnd:16r3F).
- codePoint := (codePoint bitShift:6) bitOr:(c5 codePoint bitAnd:16r3F).
- codePoint <= 16r1FFFFF ifTrue:[
- InvalidEncodingError raiseRequestWith:codePoint.
- ].
- ^ Character codePoint:codePoint
+ "/ 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 26 bits
+ codePoint := codePoint bitAnd:16r03.
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+ codePoint <= 16r1FFFFF ifTrue:[
+ InvalidEncodingError raiseRequestWith:codePoint.
+ ].
+ ^ Character codePoint:codePoint
].
(codePoint bitAnd:2r11111110) == 2r11111100 ifTrue:[
- "/ 1111110x ... 10xxxxxx - any number of bits
- codePoint := c1 codePoint bitAnd:16r01.
-
- c2 := aStream peek.
- [c2 notNil and:[(c2 codePoint bitAnd:2r11000000) == 2r10000000]] whileTrue:[
- codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
- aStream next.
- c2 := aStream peek.
- ].
- codePoint <= 16r3FFFFFF ifTrue:[
- InvalidEncodingError raiseRequestWith:codePoint.
- ].
- ^ Character codePoint:codePoint
+ "/ 1111110x ... 10xxxxxx - any number of bits
+ codePoint := codePoint bitAnd:16r01.
+
+ c2 := aStream peek.
+ [c2 notNil and:[(c2 codePoint bitAnd:2r11000000) == 2r10000000]] whileTrue:[
+ codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
+ aStream next.
+ c2 := aStream peek.
+ ].
+ codePoint <= 16r3FFFFFF ifTrue:[
+ InvalidEncodingError raiseRequestWith:codePoint.
+ ].
+ ^ Character codePoint:codePoint
].
InvalidEncodingError raiseRequestWith:codePoint.
- ^ c1
+ ^ c1 asCharacter.
"
Character utf8DecodeFrom:'a' readStream
@@ -310,12 +295,12 @@
|utf8Encoding original readBack|
1 to:16rFFFF do:[:codePoint |
- original := Character value:codePoint.
- utf8Encoding := original asString utf8Encoded.
- readBack := Character utf8DecodeFrom:(utf8Encoding readStream).
- readBack codePoint = codePoint ifFalse:[
- self halt
- ]
+ original := Character value:codePoint.
+ utf8Encoding := original utf8Encoded.
+ readBack := Character utf8DecodeFrom:(utf8Encoding readStream).
+ readBack codePoint = codePoint ifFalse:[
+ self halt
+ ]
]
"
!
@@ -1472,7 +1457,7 @@
^ s contents
"
- 'ä' utf8Encoded
+ 'ä' utf8Encoded
"
! !
@@ -2534,9 +2519,9 @@
"
$e asNonDiacritical
- $é asNonDiacritical
- $ä asNonDiacritical
- $Ã¥ asNonDiacritical
+ $é asNonDiacritical
+ $ä asNonDiacritical
+ $å asNonDiacritical
"
!