--- a/Character.st Wed Jun 26 09:39:03 1996 +0200
+++ b/Character.st Thu Jun 27 12:37:56 1996 +0200
@@ -17,7 +17,7 @@
category:'Magnitude-General'
!
-!Character class methodsFor:'documentation'!
+!Character class methodsFor:'documentation'!
copyright
"
@@ -35,15 +35,27 @@
documentation
"
+ This class represents characters.
+ Notice, that actual character objects are not used when characters
+ are stored in strings, symbols or twoByteStrings; these only store
+ a characters asciiValue for a more compact representation.
+ The word 'asciiValue' is a historic leftover - actually, any integer
+ code is allowed (i.e. characters are not limited to 8bit).
+
Single byte Characters are unique;
- i.e. for every asciiValue (0..255) there exists exactly one instance of Character,
- which is shared.
- Other characters (i.e. asciivalue > 255) are not shared.
+ i.e. for every asciiValue (0..255) there exists exactly one instance of
+ Character, which is shared (Character value:xxx checks for this, and returns
+ a reference to an existing instance).
+ Other characters (i.e. asciivalue > 255) are not shared; i.e. these
+ are created as required.
This means: you may compare characters using #== ONLY IFF you are certain,
that the characters ranges is 0..255. Otherwise, you HAVE TO compare
using #=. (if in doubt, always compare using #=).
- Sorry for this inconvenience.
+ Sorry for this inconvenience, but it is (practically) impossible to keep
+ the possible maximum of 2^32 characters (Unicode) around, for that
+ convenience alone.
+
Methods marked as (JS) come from the manchester Character goody
(CharacterComparing) by Jan Steinman, which allow Characters to be used as
@@ -69,7 +81,7 @@
"
! !
-!Character class methodsFor:'instance creation'!
+!Character class methodsFor:'instance creation'!
basicNew
"catch new - Characters cannot be created with new"
@@ -119,7 +131,7 @@
self error:'invalid ascii code for character'
! !
-!Character class methodsFor:'constants'!
+!Character class methodsFor:'constants'!
backspace
"return the backspace character"
@@ -218,7 +230,7 @@
^ Character value:9
! !
-!Character class methodsFor:'primitive input'!
+!Character class methodsFor:'primitive input'!
fromUser
"return a character from the keyboard (C's standard input stream)
@@ -235,7 +247,7 @@
%}
! !
-!Character class methodsFor:'queries'!
+!Character class methodsFor:'queries'!
isBuiltInClass
"return true if this class is known by the run-time-system.
@@ -250,10 +262,14 @@
asciiValue
"return the asciivalue of myself.
+ The name 'asciiValue' is a historic leftover - characters are not
+ limited to 8bit characters.
PP has removed this methhod with 4.1 and providing
asInteger instead."
^asciivalue
+
+ "Modified: 27.6.1996 / 12:34:34 / cg"
!
instVarAt:index put:anObject
@@ -269,16 +285,24 @@
Wrap if the resulting value is not a legal Character value. (JS)"
^ Character value:(asciivalue + aMagnitude asInteger \\ 256)
+
+ "
+ $A + 5
+ "
+
+ "Modified: 27.6.1996 / 12:34:51 / cg"
!
- aMagnitude
"Return the Character that is <aMagnitude> lower than the receiver.
Wrap if the resulting value is not a legal Character value. (JS)
- claus: modified to return the difference as integer, if the argument
- is another character"
+ claus:
+ modified to return the difference as integer, if the argument
+ is another character. If the argument is a number, a character is
+ returned."
aMagnitude isCharacter ifTrue:[
- ^ self asciiValue - aMagnitude asciiValue
+ ^ self asciiValue - aMagnitude asciiValue
].
^ Character value:(asciivalue - aMagnitude asInteger \\ 256)
@@ -286,6 +310,8 @@
$z - $a
$d - 3
"
+
+ "Modified: 27.6.1996 / 12:35:34 / cg"
!
// aMagnitude
@@ -409,8 +435,37 @@
"return a character with same letter as the receiver,
but lowercase (the receiver if its lowercase or nonLetter)"
- self isUppercase ifFalse:[^ self].
- ^ Character value:(asciivalue + 32)
+ |code "{Class: SmallInteger }"|
+
+"/ the old code:
+"/ self isUppercase ifFalse:[^ self].
+"/ ^ Character value:(asciivalue + 32)
+
+ code := asciivalue.
+
+ "/ ISO Latin-1
+ ((code >= $A asciiValue) and:[code <= $Z asciiValue]) ifTrue:[
+ ^ Character value:(code + ($a asciiValue - $A asciiValue))
+ ].
+ code < 16r00C0 ifTrue:[^ self].
+ code < 16r0100 ifTrue:[
+ code >= 16r00DF ifTrue:[^ self].
+ code == 16r00D7 ifTrue:[^ self].
+ ^ Character value:(code + 16r20)
+ ].
+
+ "/ mhmh - in which encoding is this character.
+ "/ here, assume Unicode
+
+ 'CHARACTER: Unicode support is under construction' infoPrintCR.
+ ^ self
+
+ "
+ $A asLowercase
+ $1 asLowercase
+ "
+
+ "Modified: 27.6.1996 / 12:24:46 / cg"
!
asString
@@ -460,8 +515,39 @@
"return a character with same letter as the receiver,
but uppercase (the receiver if its uppercase or nonLetter)"
- self isLowercase ifFalse:[^ self].
- ^ Character value:(asciivalue - 32)
+ |code "{Class: SmallInteger }"|
+
+"/ the old code:
+"/ self isLowercase ifFalse:[^ self].
+"/ ^ Character value:(asciivalue - 32)
+
+ code := asciivalue.
+
+ "/ ISO Latin-1
+ ((code >= $a asciiValue) and:[code <= $z asciiValue]) ifTrue:[
+ ^ Character value:(code + ($A asciiValue - $a asciiValue))
+ ].
+ code < 16r00E0 ifTrue:[^ self].
+ code < 16r0100 ifTrue:[
+ code == 16r00F7 ifTrue:[^ self]. "/ division
+"/ code == 16r00FF ifTrue:[^ Character value:16r0178]. "/ y diaresis (no uppercase equivalent in ISO-latin 1
+ code == 16r00FF ifTrue:[^ self].
+
+ ^ Character value:(code - 16r20)
+ ].
+
+ "/ mhmh - in which encoding is this character.
+ "/ here, assume Unicode
+
+ 'CHARACTER: Unicode support is under construction' infoPrintCR.
+ ^ self
+
+ "
+ $A asLowercase
+ $1 asLowercase
+ "
+
+ "Modified: 27.6.1996 / 12:28:28 / cg"
!
digitValue
@@ -837,8 +923,8 @@
^ false
! !
-!Character class methodsFor:'documentation'!
+!Character class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/Character.st,v 1.38 1996-04-25 16:57:23 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/Character.st,v 1.39 1996-06-27 10:37:56 cg Exp $'
! !