Character.st
changeset 1491 a42ae3fbb756
parent 1295 83f594f05c52
child 2124 6238280f6120
--- a/Character.st	Wed Jun 26 09:39:03 1996 +0200
+++ b/Character.st	Thu Jun 27 12:37:56 1996 +0200
@@ -17,7 +17,7 @@
 	category:'Magnitude-General'
 !
 
-!Character class methodsFor:'documentation'!
+!Character  class methodsFor:'documentation'!
 
 copyright
 "
@@ -35,15 +35,27 @@
 
 documentation
 "
+    This class represents characters.
+    Notice, that actual character objects are not used when characters
+    are stored in strings, symbols or twoByteStrings; these only store
+    a characters asciiValue for a more compact representation.
+    The word 'asciiValue' is a historic leftover - actually, any integer
+    code is allowed (i.e. characters are not limited to 8bit).
+
     Single byte Characters are unique; 
-    i.e. for every asciiValue (0..255) there exists exactly one instance of Character, 
-    which is shared.
-    Other characters (i.e. asciivalue > 255) are not shared.
+    i.e. for every asciiValue (0..255) there exists exactly one instance of 
+    Character, which is shared (Character value:xxx checks for this, and returns 
+    a reference to an existing instance).
+    Other characters (i.e. asciivalue > 255) are not shared; i.e. these
+    are created as required.
 
     This means: you may compare characters using #== ONLY IFF you are certain,
     that the characters ranges is 0..255. Otherwise, you HAVE TO compare
     using #=. (if in doubt, always compare using #=).
-    Sorry for this inconvenience.
+    Sorry for this inconvenience, but it is (practically) impossible to keep
+    the possible maximum of 2^32 characters (Unicode) around, for that
+    convenience alone.
+
 
     Methods marked as (JS) come from the manchester Character goody
     (CharacterComparing) by Jan Steinman, which allow Characters to be used as
@@ -69,7 +81,7 @@
 "
 ! !
 
-!Character class methodsFor:'instance creation'!
+!Character  class methodsFor:'instance creation'!
 
 basicNew
     "catch new - Characters cannot be created with new"
@@ -119,7 +131,7 @@
     self error:'invalid ascii code for character'
 ! !
 
-!Character class methodsFor:'constants'!
+!Character  class methodsFor:'constants'!
 
 backspace
     "return the backspace character"
@@ -218,7 +230,7 @@
     ^ Character value:9
 ! !
 
-!Character class methodsFor:'primitive input'!
+!Character  class methodsFor:'primitive input'!
 
 fromUser
     "return a character from the keyboard (C's standard input stream)
@@ -235,7 +247,7 @@
 %}
 ! !
 
-!Character class methodsFor:'queries'!
+!Character  class methodsFor:'queries'!
 
 isBuiltInClass
     "return true if this class is known by the run-time-system.
@@ -250,10 +262,14 @@
 
 asciiValue
     "return the asciivalue of myself.
+     The name 'asciiValue' is a historic leftover - characters are not
+     limited to 8bit characters.
      PP has removed this methhod with 4.1 and providing
      asInteger instead."
 
     ^asciivalue
+
+    "Modified: 27.6.1996 / 12:34:34 / cg"
 !
 
 instVarAt:index put:anObject
@@ -269,16 +285,24 @@
      Wrap if the resulting value is not a legal Character value. (JS)"
 
     ^ Character value:(asciivalue + aMagnitude asInteger \\ 256)
+
+    "
+     $A + 5
+    "
+
+    "Modified: 27.6.1996 / 12:34:51 / cg"
 !
 
 - aMagnitude
     "Return the Character that is <aMagnitude> lower than the receiver.  
      Wrap if the resulting value is not a legal Character value. (JS)
-     claus: modified to return the difference as integer, if the argument
-	    is another character"
+     claus: 
+        modified to return the difference as integer, if the argument
+        is another character. If the argument is a number, a character is
+        returned."
 
     aMagnitude isCharacter ifTrue:[
-	^ self asciiValue - aMagnitude asciiValue
+        ^ self asciiValue - aMagnitude asciiValue
     ].
     ^ Character value:(asciivalue - aMagnitude asInteger \\ 256)
 
@@ -286,6 +310,8 @@
      $z - $a  
      $d - 3
     "
+
+    "Modified: 27.6.1996 / 12:35:34 / cg"
 !
 
 // aMagnitude
@@ -409,8 +435,37 @@
     "return a character with same letter as the receiver,
      but lowercase (the receiver if its lowercase or nonLetter)"
 
-    self isUppercase ifFalse:[^ self].
-    ^ Character value:(asciivalue + 32)
+    |code "{Class: SmallInteger }"|
+
+"/ the old code:
+"/    self isUppercase ifFalse:[^ self].
+"/    ^ Character value:(asciivalue + 32)
+
+    code := asciivalue.
+
+    "/ ISO Latin-1
+    ((code >= $A asciiValue) and:[code <= $Z asciiValue]) ifTrue:[
+        ^ Character value:(code + ($a asciiValue - $A asciiValue))
+    ].
+    code < 16r00C0 ifTrue:[^ self].
+    code < 16r0100 ifTrue:[
+        code >= 16r00DF ifTrue:[^ self].
+        code == 16r00D7 ifTrue:[^ self].
+        ^ Character value:(code + 16r20)
+    ].
+
+    "/ mhmh - in which encoding is this character.
+    "/ here, assume Unicode
+
+    'CHARACTER: Unicode support is under construction' infoPrintCR.
+    ^ self
+
+    "
+     $A asLowercase 
+     $1 asLowercase  
+    "
+
+    "Modified: 27.6.1996 / 12:24:46 / cg"
 !
 
 asString
@@ -460,8 +515,39 @@
     "return a character with same letter as the receiver,
      but uppercase (the receiver if its uppercase or nonLetter)"
 
-    self isLowercase ifFalse:[^ self].
-    ^ Character value:(asciivalue - 32)
+    |code "{Class: SmallInteger }"|
+
+"/ the old code:
+"/    self isLowercase ifFalse:[^ self].
+"/    ^ Character value:(asciivalue - 32)
+
+    code := asciivalue.
+
+    "/ ISO Latin-1
+    ((code >= $a asciiValue) and:[code <= $z asciiValue]) ifTrue:[
+        ^ Character value:(code + ($A asciiValue - $a asciiValue))
+    ].
+    code < 16r00E0 ifTrue:[^ self].
+    code < 16r0100 ifTrue:[
+        code == 16r00F7 ifTrue:[^ self]. "/ division
+"/        code == 16r00FF ifTrue:[^ Character value:16r0178].  "/ y diaresis (no uppercase equivalent in ISO-latin 1
+        code == 16r00FF ifTrue:[^ self]. 
+
+        ^ Character value:(code - 16r20)
+    ].
+
+    "/ mhmh - in which encoding is this character.
+    "/ here, assume Unicode
+
+    'CHARACTER: Unicode support is under construction' infoPrintCR.
+    ^ self
+
+    "
+     $A asLowercase 
+     $1 asLowercase  
+    "
+
+    "Modified: 27.6.1996 / 12:28:28 / cg"
 !
 
 digitValue
@@ -837,8 +923,8 @@
     ^ false
 ! !
 
-!Character class methodsFor:'documentation'!
+!Character  class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/Character.st,v 1.38 1996-04-25 16:57:23 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/Character.st,v 1.39 1996-06-27 10:37:56 cg Exp $'
 ! !