#FEATURE by stefan
class: Character
added: #utf8BytesPerCharacter
comment/format in: #bytesPerCharacter
changed: #utf8Encoded
--- a/Character.st Tue Feb 07 20:17:46 2017 +0100
+++ b/Character.st Tue Feb 07 22:13:38 2017 +0100
@@ -311,6 +311,7 @@
^ self codePoint:anInteger
! !
+
!Character class methodsFor:'accessing untypeable characters'!
controlCharacter:char
@@ -357,6 +358,7 @@
^ self codePoint:41
! !
+
!Character class methodsFor:'constants'!
backspace
@@ -646,6 +648,8 @@
or:[ (asciivalue == 247 ) ]]]]]
! !
+
+
!Character methodsFor:'accessing'!
codePoint
@@ -1486,10 +1490,10 @@
|s|
asciivalue <= 16r7F ifTrue:[
- ^ self asString.
+ ^ self asString.
].
- s := WriteStream on:(String new:6).
+ s := WriteStream on:(String new:self utf8BytesPerCharacter).
s nextPutUtf8:self.
^ s contents
@@ -1497,6 +1501,8 @@
'ä' utf8Encoded
'a' utf8Encoded
"
+
+ "Modified: / 07-02-2017 / 14:37:06 / stefan"
! !
!Character methodsFor:'copying'!
@@ -1771,6 +1777,8 @@
asciivalue <= 16rFF ifTrue:[^ 1].
asciivalue <= 16rFFFF ifTrue:[^ 2].
^ 4
+
+ "Modified: / 07-02-2017 / 14:36:05 / stefan"
!
characterSize
@@ -1864,6 +1872,21 @@
"
(Character value:16r200) unicodeBlock
"
+!
+
+utf8BytesPerCharacter
+ "return the number of bytes I require for storage in utf-8 encoding"
+
+ asciivalue <= 16r7F ifTrue:[^ 1].
+ asciivalue <= 16r7FF ifTrue:[^ 2].
+ asciivalue <= 16rFFFF ifTrue:[^ 3].
+ asciivalue <= 16r1FFFFF ifTrue:[^ 4].
+ asciivalue <= 16r1FFFFF ifTrue:[^ 5].
+ asciivalue <= 16r3FFFFFF ifTrue:[^ 6].
+
+ self error:'character cannot represented as utf8 (too large)'
+
+ "Created: / 07-02-2017 / 14:35:56 / stefan"
! !
!Character methodsFor:'testing'!
@@ -3172,3 +3195,4 @@
version_CVS
^ '$Header$'
! !
+