Character.st
changeset 20015 e91971b89af2
parent 19810 eb006895e0f9
child 20016 40b9d147b855
--- a/Character.st	Mon Jun 20 13:35:15 2016 +0200
+++ b/Character.st	Mon Jun 20 14:05:34 2016 +0200
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
 "
  COPYRIGHT (c) 1988 by Claus Gittinger
 	      All Rights Reserved
@@ -311,7 +313,6 @@
     ^ self codePoint:anInteger
 ! !
 
-
 !Character class methodsFor:'accessing untypeable characters'!
 
 controlCharacter:char
@@ -358,7 +359,6 @@
     ^ self codePoint:41
 ! !
 
-
 !Character class methodsFor:'constants'!
 
 backspace
@@ -648,7 +648,6 @@
       or:[ (asciivalue == 247 ) ]]]]]
 ! !
 
-
 !Character methodsFor:'accessing'!
 
 codePoint
@@ -1499,7 +1498,7 @@
     ^ s contents
 
     "
-     'ä' utf8Encoded 
+     'ä' utf8Encoded 
      'a' utf8Encoded 
     "
 ! !
@@ -1798,6 +1797,74 @@
     asciivalue <= 16rFF ifTrue:[^ String].
     asciivalue <= 16rFFFF ifTrue:[^ Unicode16String].
     ^ Unicode32String
+!
+
+unicodeBlock
+    "return the name of the unicode block in which this character is"
+
+    asciivalue <= 16r007F ifTrue:[^ #BASIC_LATIN ].
+    asciivalue <= 16r00FF ifTrue:[^ #LATIN1_SUPPLEMENT ].
+    asciivalue <= 16r017F ifTrue:[^ #LATIN1_EXTENDED_A].
+    asciivalue <= 16r024F ifTrue:[^ #LATIN1_EXTENDED_B].
+    asciivalue <= 16r02AF ifTrue:[^ #IPA_EXTENSIONS].
+    asciivalue <= 16r02FF ifTrue:[^ #SPACING_MODIFIER_LETTERS].
+    asciivalue <= 16r036f ifTrue:[ ^ #COMBINING_DIACRITICAL_MARKS ].
+    asciivalue <= 16r03FF ifTrue:[ ^ #GREEK_AND_COPTIC ].
+    asciivalue <= 16r04FF ifTrue:[ ^ #CYRILLIC ].
+    asciivalue <= 16r052F ifTrue:[ ^ #CYRILLIC_SUPPLEMENT ].
+    asciivalue <= 16r058F ifTrue:[ ^ #ARMENIAN ].
+    asciivalue <= 16r05FF ifTrue:[ ^ #HEBREW ].
+    asciivalue <= 16r06FF ifTrue:[ ^ #ARABIC ].
+    asciivalue <= 16r074F ifTrue:[ ^ #SYRIAC ].
+    asciivalue <= 16r077F ifTrue:[ ^ #ARABIC_SUPPLEMENT ].
+    asciivalue <= 16r07BF ifTrue:[ ^ #THAANA ].
+    asciivalue <= 16r07FF ifTrue:[ ^ #NKO ].
+    asciivalue <= 16r083F ifTrue:[ ^ #SAMARITAN ].
+    asciivalue <= 16r085F ifTrue:[ ^ #MANDAIC ].
+    asciivalue <= 16r087F ifTrue:[ ^ nil ].
+    asciivalue <= 16r08FF ifTrue:[ ^ #ARABIC_EXTENDED_A ].
+    asciivalue <= 16r097F ifTrue:[ ^ #DEVANAGARI ].
+    asciivalue <= 16r09FF ifTrue:[ ^ #BENGALI ].
+    asciivalue <= 16r0A7F ifTrue:[ ^ #GURMUKHI ].
+    asciivalue <= 16r0AFF ifTrue:[ ^ #GUJARATI ].
+    asciivalue <= 16r0B7F ifTrue:[ ^ #ORIYA ].
+    asciivalue <= 16r0BFF ifTrue:[ ^ #TAMIL ].
+    asciivalue <= 16r0C7F ifTrue:[ ^ #TELUGU ].
+    asciivalue <= 16r0CFF ifTrue:[ ^ #KANNADA ].
+    asciivalue <= 16r0D7F ifTrue:[ ^ #MALAYALAM ].
+    asciivalue <= 16r0DFF ifTrue:[ ^ #SINHALA ].
+    asciivalue <= 16r0E7F ifTrue:[ ^ #THAI ].
+    asciivalue <= 16r0EFF ifTrue:[ ^ #LAO ].
+    asciivalue <= 16r0FFF ifTrue:[ ^ #TIBETAN ].
+    asciivalue <= 16r109F ifTrue:[ ^ #MYANMAR ].
+    asciivalue <= 16r10FF ifTrue:[ ^ #GEORGIAN ].
+    asciivalue <= 16r11FF ifTrue:[ ^ #HANGUL_JAMO ].
+    asciivalue <= 16r137F ifTrue:[ ^ #ETHIOPIC ].
+    asciivalue <= 16r139F ifTrue:[ ^ #ETHIOPIC_SUPPLEMENT ].
+    asciivalue <= 16r13FF ifTrue:[ ^ #CHEROKEE ].
+    asciivalue <= 16r167F ifTrue:[ ^ #UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS ].
+    asciivalue < 16r1AB0 ifTrue:[ ^ #OTHER ].
+    asciivalue <= 16r1AFF ifTrue:[ ^ #COMBINING_DIACRITICAL_MARKS_EXTENDED ].
+    asciivalue < 16r1DC0 ifTrue:[ ^ #OTHER ].
+    asciivalue <= 16r1DFF ifTrue:[ ^ #COMBINING_DIACRITICAL_MARKS_SUPPLEMENT ].
+    asciivalue <= 16r1EFF ifTrue:[ ^ #LATIN_EXTENDED_ADDITIONAL ].
+    asciivalue <= 16r1FFF ifTrue:[ ^ #GREEK_EXTENDED ].
+    asciivalue <= 16r206F ifTrue:[ ^ #GENERAL_PUNKTUATION ].
+    asciivalue <= 16r209F ifTrue:[ ^ #SUPERSCRIPTS_AND_SUBSCRIPTS ].
+    asciivalue <= 16r20CF ifTrue:[ ^ #CURRENCY_SYMBOLS ].
+    asciivalue < 16r2190 ifTrue:[ ^ #OTHER ].
+    asciivalue <= 16r21FF ifTrue:[ ^ #ARROWS ].
+    asciivalue <= 16r22FF ifTrue:[ ^ #MATHEMATICAL_OPERATORS ].
+    asciivalue <= 16r23FF ifTrue:[ ^ #MISCELLANEOUS_TECHNICAL ].
+    asciivalue < 16r2600 ifTrue:[ ^ #OTHER ].
+    asciivalue <= 16r26FF ifTrue:[ ^ #MISCELLANEOUS_SYMBOLS ].
+    asciivalue <= 16r27BF ifTrue:[ ^ #DINGBATS ].
+    asciivalue <= 16r27EF ifTrue:[ ^ #MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A ].
+    ^ #OTHER
+
+    "
+     (Character value:16r200) unicodeBlock
+    "
 ! !
 
 !Character methodsFor:'testing'!
@@ -2569,9 +2636,9 @@
 
     "
      $e asNonDiacritical
-     $é asNonDiacritical
-     $ä asNonDiacritical
-     $å asNonDiacritical
+     $é asNonDiacritical
+     $ä asNonDiacritical
+     $Ã¥ asNonDiacritical
     "
 !