Character.st
branchjv
changeset 18610 b9799e74a9c5
parent 18608 7d521f25267c
parent 18605 4f0a114fed00
child 18678 a9b30d72dff9
--- a/Character.st	Tue Jul 21 06:19:13 2015 +0100
+++ b/Character.st	Tue Jul 21 06:19:27 2015 +0100
@@ -1,5 +1,3 @@
-"{ Encoding: utf8 }"
-
 "
  COPYRIGHT (c) 1988 by Claus Gittinger
 	      All Rights Reserved
@@ -198,107 +196,94 @@
 utf8DecodeFrom:aStream
     "read and return a single unicode character from an UTF8 encoded stream"
 
-    |fetchNext c1 c2 c3 c4 c5 codePoint|
+    |fetchNext c1 c2 codePoint|
 
     c1 := aStream next.
     codePoint := c1 codePoint.
     codePoint <= 16r7F ifTrue:[
-	"/ 0xxxxxxx - 7 bits
-	^ c1.
+        "/ 0xxxxxxx - 7 bits
+        ^ c1 asCharacter.
     ].
 
     (codePoint bitAnd:2r11000000) == 2r10000000 ifTrue:[
-	"/ out of sync (got an intermediate character)
-	InvalidEncodingError raiseRequestWith:codePoint errorString:' - out of sync'.
-	^ c1.
+        "/ out of sync (got an intermediate character)
+        InvalidEncodingError raiseRequestWith:codePoint errorString:' - out of sync'.
+        ^ c1 asCharacter.
     ].
 
-    fetchNext := [  |ch|
-		    ch := aStream next.
-		    (ch codePoint bitAnd:2r11000000) == 2r10000000 ifFalse:[
-			"/ followup chars must have 2r10 in high bits
-			InvalidEncodingError raiseRequestWith:ch codePoint.
-			^ c1.
-		    ].
-		    ch
-		 ].
+    fetchNext := [  |code|
+                    code := aStream next codePoint.
+                    (code bitAnd:2r11000000) == 2r10000000 ifFalse:[
+                        "/ followup chars must have 2r10 in high bits
+                        InvalidEncodingError raiseRequestWith:code.
+                        ^ c1 asCharacter.
+                    ].
+                    code bitAnd:16r3F
+                 ].
 
     (codePoint bitAnd:2r11100000) == 2r11000000 ifTrue:[
-	"/ 110xxxxx 10xxxxxx - 11 bits
-	c2 := fetchNext value.
-	codePoint := c1 codePoint bitAnd:16r1F.
-	codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
-	codePoint <= 16r7F ifTrue:[
-	    InvalidEncodingError raiseRequestWith:codePoint.
-	].
-	^ Character codePoint:codePoint
+        "/ 110xxxxx 10xxxxxx - 11 bits
+        codePoint := codePoint bitAnd:16r1F.
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint <= 16r7F ifTrue:[
+            InvalidEncodingError raiseRequestWith:codePoint.
+        ].
+        ^ Character codePoint:codePoint
     ].
     (codePoint bitAnd:2r11110000) == 2r11100000 ifTrue:[
-	"/ 1110xxxx 10xxxxxx 10xxxxxx - 16 bits
-	c2 := fetchNext value.
-	c3 := fetchNext value.
-	codePoint := c1 codePoint bitAnd:16r0F.
-	codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
-	codePoint := (codePoint bitShift:6) bitOr:(c3 codePoint bitAnd:16r3F).
-	codePoint <= 16r7FF ifTrue:[
-	    InvalidEncodingError raiseRequestWith:codePoint.
-	].
-	^ Character codePoint:codePoint
+        "/ 1110xxxx 10xxxxxx 10xxxxxx - 16 bits
+        codePoint := codePoint bitAnd:16r0F.
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint <= 16r7FF ifTrue:[
+            InvalidEncodingError raiseRequestWith:codePoint.
+        ].
+        ^ Character codePoint:codePoint
     ].
 
-    "/ notice: currently, characters can only have 16bit encoding;
-    "/ therefore the following will raise a runtime exception,
-
     (codePoint bitAnd:2r11111000) == 2r11110000 ifTrue:[
-	"/ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 21 bits
-	c2 := fetchNext value.
-	c3 := fetchNext value.
-	c4 := fetchNext value.
-	codePoint := c1 codePoint bitAnd:16r07.
-	codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
-	codePoint := (codePoint bitShift:6) bitOr:(c3 codePoint bitAnd:16r3F).
-	codePoint := (codePoint bitShift:6) bitOr:(c4 codePoint bitAnd:16r3F).
-	codePoint <= 16rFFFF ifTrue:[
-	    InvalidEncodingError raiseRequestWith:codePoint.
-	].
-	^ Character codePoint:codePoint
+        "/ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 21 bits
+        codePoint := codePoint bitAnd:16r07.
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint <= 16rFFFF ifTrue:[
+            InvalidEncodingError raiseRequestWith:codePoint.
+        ].
+        ^ Character codePoint:codePoint
     ].
 
     (codePoint bitAnd:2r11111100) == 2r11111000 ifTrue:[
-	"/ 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 26 bits
-	c2 := fetchNext value.
-	c3 := fetchNext value.
-	c4 := fetchNext value.
-	c5 := fetchNext value.
-	codePoint := c1 codePoint bitAnd:16r03.
-	codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
-	codePoint := (codePoint bitShift:6) bitOr:(c3 codePoint bitAnd:16r3F).
-	codePoint := (codePoint bitShift:6) bitOr:(c4 codePoint bitAnd:16r3F).
-	codePoint := (codePoint bitShift:6) bitOr:(c5 codePoint bitAnd:16r3F).
-	codePoint <= 16r1FFFFF ifTrue:[
-	    InvalidEncodingError raiseRequestWith:codePoint.
-	].
-	^ Character codePoint:codePoint
+        "/ 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 26 bits
+        codePoint := codePoint bitAnd:16r03.
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint := (codePoint bitShift:6) bitOr:(fetchNext value).
+        codePoint <= 16r1FFFFF ifTrue:[
+            InvalidEncodingError raiseRequestWith:codePoint.
+        ].
+        ^ Character codePoint:codePoint
     ].
 
     (codePoint bitAnd:2r11111110) == 2r11111100 ifTrue:[
-	"/ 1111110x ... 10xxxxxx - any number of bits
-	codePoint := c1 codePoint bitAnd:16r01.
-
-	c2 := aStream peek.
-	[c2 notNil and:[(c2 codePoint bitAnd:2r11000000) == 2r10000000]] whileTrue:[
-	    codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
-	    aStream next.
-	    c2 := aStream peek.
-	].
-	codePoint <= 16r3FFFFFF ifTrue:[
-	    InvalidEncodingError raiseRequestWith:codePoint.
-	].
-	^ Character codePoint:codePoint
+        "/ 1111110x ... 10xxxxxx - any number of bits
+        codePoint := codePoint bitAnd:16r01.
+
+        c2 := aStream peek.
+        [c2 notNil and:[(c2 codePoint bitAnd:2r11000000) == 2r10000000]] whileTrue:[
+            codePoint := (codePoint bitShift:6) bitOr:(c2 codePoint bitAnd:16r3F).
+            aStream next.
+            c2 := aStream peek.
+        ].
+        codePoint <= 16r3FFFFFF ifTrue:[
+            InvalidEncodingError raiseRequestWith:codePoint.
+        ].
+        ^ Character codePoint:codePoint
     ].
 
     InvalidEncodingError raiseRequestWith:codePoint.
-    ^ c1
+    ^ c1 asCharacter.
 
     "
       Character utf8DecodeFrom:'a' readStream
@@ -310,12 +295,12 @@
       |utf8Encoding original readBack|
 
       1 to:16rFFFF do:[:codePoint |
-	original := Character value:codePoint.
-	utf8Encoding := original asString utf8Encoded.
-	readBack := Character utf8DecodeFrom:(utf8Encoding readStream).
-	readBack codePoint = codePoint ifFalse:[
-	    self halt
-	]
+        original := Character value:codePoint.
+        utf8Encoding := original utf8Encoded.
+        readBack := Character utf8DecodeFrom:(utf8Encoding readStream).
+        readBack codePoint = codePoint ifFalse:[
+            self halt
+        ]
       ]
     "
 !
@@ -1472,7 +1457,7 @@
     ^ s contents
 
     "
-	'ä' utf8Encoded
+	'ä' utf8Encoded
     "
 ! !
 
@@ -2534,9 +2519,9 @@
 
     "
      $e asNonDiacritical
-     $é asNonDiacritical
-     $ä asNonDiacritical
-     $Ã¥ asNonDiacritical
+     $é asNonDiacritical
+     $ä asNonDiacritical
+     $å asNonDiacritical
     "
 !