Character.st
author Claus Gittinger <cg@exept.de>
Thu, 07 Dec 1995 22:32:39 +0100
changeset 699 12f456343eea
parent 544 d78012b20769
child 700 b4ae5ce39bfc
permissions -rw-r--r--
checkin from browser

"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

Magnitude subclass:#Character
	 instanceVariableNames:'asciivalue'
	 classVariableNames:''
	 poolDictionaries:''
	 category:'Magnitude-General'
!

!Character class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    Single byte Characters are unique; this means that for every asciiValue (0..255) there
    is exactly one instance of Character, which is shared.
    Other characters (i.e. asciivalue > 255) are not shared.

    This means: you may compare characters using #== iff you are certain,
    that the characters ranges is 0..255. Otherwise, you HAVE TO compare
    using #=. (if in doubt, always compare using #=).
    Sorry for this inconvenience.

    Methods marked as (JS) come from the manchester Character goody
    (CharacterComparing) by Jan Steinman, which allow Characters to be used as
    Interval elements (i.e. ($a to:$z) do:[...] ); some of these have been
    modified a bit.

    WARNING: characters are known by compiler and runtime system -
	     do not change the instance layout. 

    Also, although you can create subclasses of Character, the compiler always
    creates instances of Character for literals ...
    ... and other classes are hard-wired ro always return instances of characters
    in some cases (i.e. String>>at:, Symbol>>at: etc.).
    Therefore, it may not make sense to create a character-subclass.
"
! !

!Character class methodsFor:'instance creation'!

basicNew
    "catch new - Characters cannot be created with new"

    ^ self error:'Characters cannot be created with new'
!

digitValue:anInteger
    "return a character that corresponds to anInteger.
     0-9 map to $0-$9, 10-35 map to $A-$Z"

    |val "{ Class: SmallInteger }" |

    val := anInteger.
    (val between:0 and:9) ifTrue:[
	^ Character value:(val + ($0 asciiValue))
    ].
    (val between:10 and:35) ifTrue:[
	^ Character value:(val + ($A asciiValue - 10))
    ].
    ^self error:'value not in range 0 to 35'
!

value:anInteger
    "return a character with asciivalue anInteger"

%{  /* NOCONTEXT */

    INT ascii;

    if (__isSmallInteger(anInteger)) {
	ascii = __intVal(anInteger);
	if ((ascii & ~0xFF) == 0 /* (ascii >= 0) && (ascii <= 255) */)
	    RETURN ( _MKCHARACTER(ascii) );
    }
%}
.
    (anInteger between:0 and:16rFF) ifTrue:[
	^ CharacterTable at:(anInteger + 1)
    ].
    (anInteger between:16r100 and:16rFFFF) ifTrue:[
	^ super basicNew setAsciiValue:anInteger
    ].
    "
     a characters ascii-code must be 0..16rFFFF.
     (i.e. only single-byte and twoByte characters are allowed.)
    "
    self error:'invalid ascii code for character'
! !

!Character class methodsFor:'constants'!

backspace
    "return the backspace character"

    ^ Character value:8
!

bell
    "return the bell character"

    ^ Character value:7
!

cr
    "return the lineEnd character 
     - actually (in unix) this is a newline character"

    ^ Character value:10
!

del 
    "return the delete character"

    ^ Character value:16r7F 
!

doubleQuote
    "return the double-quote character"

    ^ Character value:34
!

esc
    "return the escape character"

    ^ Character value:27
!

excla
    "return the exclamation-mark character"
    ^ $!!
!

ff
    "return the form-feed character"

    ^ Character value:12
!

lf
    "return the newline/linefeed character"

    ^ Character value:10
!

maxValue 
    "return the maximum asciiValue a character can have"

    ^ 16rFFFF
!

newPage
    "return the form-feed character"

    ^ Character value:12
!

nl
    "return the newline character"

    ^ Character value:10
!

quote
    "return the single-quote character"

    ^ Character value:39
!

return 
    "return the return character.
     In ST/X, this is different from cr - for Unix reasons."

    ^ Character value:13 
!

space
    "return the blank character"

    ^ Character value:32
!

tab
    "return the tabulator character"

    ^ Character value:9
! !

!Character class methodsFor:'primitive input'!

fromUser
    "return a character from the keyboard (C's standard input stream)
     - this should only be used for emergency evaluators and the like."

%{  /* NOCONTEXT */
    int c;

    c = getchar();
    if (c < 0) {
	RETURN (nil);
    }
    RETURN ( _MKCHARACTER(c & 0xFF) );
%}
! !

!Character class methodsFor:'queries'!

isBuiltInClass
    "return true, if this classes instance layout cannot be changed
     - this class is known by the run-time-system (but not subclasses)"

    ^ self == Character
! !

!Character methodsFor:'accessing'!

asciiValue
    "return the asciivalue of myself.
     PP has removed this methhod with 4.1 and providing
     asInteger instead."

    ^asciivalue
!

instVarAt:index put:anObject
    "catch instvar access - asciivalue may not be changed"

    self error:'Characters may not be modified'
! !

!Character methodsFor:'arithmetic'!

+ aMagnitude
    "Return the Character that is <aMagnitude> higher than the receiver. 
     Wrap if the resulting value is not a legal Character value. (JS)"

    ^ Character value:(asciivalue + aMagnitude asInteger \\ 256)
!

- aMagnitude
    "Return the Character that is <aMagnitude> lower than the receiver.  
     Wrap if the resulting value is not a legal Character value. (JS)
     claus: modified to return the difference as integer, if the argument
	    is another character"

    aMagnitude isCharacter ifTrue:[
	^ self asciiValue - aMagnitude asciiValue
    ].
    ^ Character value:(asciivalue - aMagnitude asInteger \\ 256)

    "
     $z - $a  
     $d - 3
    "
!

// aMagnitude
    "Return the Character who's value is the receiver divided by <aMagnitude>. 
     Wrap if the resulting value is not a legal Character value. (JS)"

    ^ Character value:(asciivalue // aMagnitude asInteger \\ 256)
!

\\ aMagnitude
    "Return the Character who's value is the receiver modulo <aMagnitude>.  
     Wrap if the resulting value is not a legal Character value. (JS)"

    ^ Character value:(asciivalue \\ aMagnitude asInteger \\ 256)
! !

!Character methodsFor:'binary storage'!

hasSpecialBinaryRepresentation
    "return true, if the receiver has a special binary representation"

    ^ true
!

storeBinaryOn:stream manager:manager
    "store a binary representation of the receiver on stream;
     redefined, since single-byte characters are stored more compact
     with a special type-code followed by the asciiValue."

    (asciivalue < 256) ifTrue:[
	stream nextPut:manager codeForCharacter.
	stream nextPut:asciivalue
    ] ifFalse:[
	stream nextPut:manager codeForTwoByteCharacter.
	stream nextPutShort:asciivalue MSB:true
    ]
! !

!Character methodsFor:'comparing'!

< aCharacter
    "return true, if the arguments asciiValue is greater than mine"

    ^ (asciivalue < aCharacter asciiValue)
!

<= aCharacter
    "return true, if the arguments asciiValue is greater or equal to mine"

    ^ (asciivalue <= aCharacter asciiValue)
!

= aCharacter
    "return true, if the argument, aCharacter is the same character
     Redefined to take care of 16bit characters."

    self == aCharacter ifTrue:[^ true].
    aCharacter isCharacter ifFalse:[^ false].
    ^ (asciivalue = aCharacter asciiValue)
!

> aCharacter
    "return true, if the arguments asciiValue is less than mine"

    ^ (asciivalue > aCharacter asciiValue)
!

>= aCharacter
    "return true, if the arguments asciiValue is less or equal to mine"

    ^ (asciivalue >= aCharacter asciiValue)
!

identityHash
    "return an integer useful for hashing on identity"

    asciivalue <= 255 ifTrue:[
	^ 4096 + asciivalue
    ].
    ^ super identityHash
!

sameAs:aCharacter
    "return true, if the argument, aCharacter is the same character,
     ignoring case differences."

    self == aCharacter ifTrue:[^ true].
    ^ self asLowercase = aCharacter asLowercase
!

~= aCharacter
    "return true, if the argument, aCharacter is not the same character
     Redefined to take care of 16bit characters."

    self == aCharacter ifTrue:[^ false].
    aCharacter isCharacter ifFalse:[^ true].
    ^ (asciivalue ~~ aCharacter asciiValue)
! !

!Character methodsFor:'converting'!

asCharacter
    "usually sent to integers, but redefined here to allow integers
     and characters to be used commonly without a need for a test."

    ^ self

    "
     32 asCharacter  
    "
!

asInteger
    "return an Integer with my ascii-value.
     OWST4.2 compatibility (sigh)"

    ^ asciivalue
!

asLowercase
    "return a character with same letter as the receiver,
     but lowercase (the receiver if its lowercase or nonLetter)"

    self isUppercase ifFalse:[^ self].
    ^ Character value:(asciivalue + 32)
!

asString
    "return a string of len 1 with myself as contents"

"/
"/    |newString|
"/
"/    newString := String new:1.
"/    newString at:1 put:self.
"/    ^ newString
"/

%{  /* NOCONTEXT */
    char buffer[2];
    OBJ s;
    OBJ __MKSTRING_L();

    buffer[0] = (char) _intVal(_characterVal(self));
    buffer[1] = '\0';
    s = __MKSTRING_L(buffer, 1 COMMA_SND);
    if (s != nil) {
	RETURN (s);
    }
%}.
    "
     memory allocation (for the new string) failed.
     When we arrive here, there was no memory, even after a garbage collect.
     This means, that the VM wanted to get some more memory from the
     OS, which was not kind enough to give it.
     Bad luck - you should increase the swap space on your machine.
    "
    ^ ObjectMemory allocationFailureSignal raise.
!

asSymbol
    "return a unique symbol which prints like I print"

    ^ Symbol internCharacter:self
!

asUppercase
    "return a character with same letter as the receiver,
     but uppercase (the receiver if its uppercase or nonLetter)"

    self isLowercase ifFalse:[^ self].
    ^ Character value:(asciivalue - 32)
!

digitValue
    "return my digitValue for any base"

    |ascii "{ Class: SmallInteger }" |

    ascii := asciivalue.
    (ascii between:($0 asciiValue) and:($9 asciiValue)) ifTrue:[
	^ ascii - $0 asciiValue
    ].
    (ascii between:($a asciiValue) and:($z asciiValue)) ifTrue:[
	^ ascii + (10 - $a asciiValue)
    ]. 
    (ascii between:($A asciiValue) and:($Z asciiValue)) ifTrue:[
	^ ascii + (10 - $A asciiValue)
    ]. 

"remove error below for X3J20 conformance ... "
    self error:'bad character'.
" "
    ^ -1
!

to:aMagnitude
    "Return an Interval over the characters from the receiver to <aMagnitude>. 
     Wrap <aMagnitude> if it is not a legal Character value. (JS)"

    ^ Interval from:self to:(aMagnitude \\ 256)
! !

!Character methodsFor:'copying'!

copy
    "return a copy of myself
     reimplemented since characters are unique"

     ^ self
!

deepCopy
    "return a deep copy of myself
     reimplemented since characters are unique"

     ^ self
!

deepCopyUsing:aDictionary
    "return a deep copy of myself
     reimplemented since characters are unique"

     ^ self
!

shallowCopy
    "return a shallow copy of myself
     reimplemented since characters are unique"

     ^ self
!

simpleDeepCopy
    "return a deep copy of myself
     reimplemented since characters are unique"

     ^ self
! !

!Character methodsFor:'enumerating'!

to:stopCharacter do:aBlock
    "evaluate aBlock for each character in self .. stopCharacter.
     This is somewhat stupid, since it depends on the ascii encoding
     (370-users watch out :-)"

    |runChar|

    runChar := self.
    [runChar <= stopCharacter] whileTrue:[
	aBlock value:runChar.
	runChar := runChar + 1
    ]

    "
     ($a to:$z) do:[:char | char printNL]
     $a to:$z do:[:char | char printNL].
    "
! !

!Character methodsFor:'national testing'!

isNationalAlphaNumeric
    "return true, if the receiver is a letter in the
     current language (Language variable)"

    "stupid - should be configurable from a table ...
     ... good thing is, that iso8859 puts all national
	 characters above 16rC0"

    self isLetterOrDigit ifTrue:[^ true].
    ^ self asciiValue between:16rC0 and:16rFF
!

isNationalLetter
    "return true, if the receiver is a letter in the
     current language (Language variable)"

    "stupid - should be configurable from a table ...
     ... good thing is, that iso8859 puts all national
	 characters above 16rC0"

    self isLetter ifTrue:[^ true].
    ^ self asciiValue between:16rC0 and:16rFF
! !

!Character methodsFor:'printing & storing'!

displayString
    "return a string used when the receiver is to be displayed
     in an inspector kind-of-thing"

    ^ self storeString
!

isLiteral
    "return true, if the receiver can be used as a literal
     (i.e. can be used in constant arrays)"

    ^ true
!

print
    "print myself on stdout"

%{  /* NOCONTEXT */

    putchar(_intVal(_INST(asciivalue)));
%}
!

printOn:aStream
    "print myself on aStream"

    aStream nextPut:self
!

printString
    "return a string to print me"

    ^ self asString
!

storeOn:aStream
    "store myself on aStream"

    |special|

    (asciivalue between:33 and:127) ifFalse:[
	(self == Character space) ifTrue:[
	    special := '(Character space)'
	] ifFalse:[
	    (self == Character cr) ifTrue:[
		special := '(Character cr)'.
	    ] ifFalse:[
		(self == Character tab) ifTrue:[
		    special := '(Character tab)'.
		]
	    ]
	].
	special notNil ifTrue:[
	    aStream nextPutAll:special.
	    ^ self
	].
	aStream nextPutAll:'(Character value:'; 
		nextPutAll:(asciivalue printString); nextPutAll:')'
    ] ifTrue:[
	aStream nextPut:$$; nextPut:self
    ]
! !

!Character methodsFor:'private accessing'!

setAsciiValue:anInteger
    "very private - set the ascii value. 
     - use this only for characters with codes > 16rFF.
     DANGER alert: funny things happen, if this is applied to
     one of the fixed-characters 0..255."

    asciivalue := anInteger
! !

!Character methodsFor:'testing'!

isAlphaNumeric
    "return true, if I am a letter or a digit
     - same as isAlphaNumeric for compatibility reasons."

    ^ self isLetterOrDigit
!

isCharacter
    "return true,  if the receiver is some kind of character"

    ^ true
!

isDigit
    "return true, if I am a digit (i.e. $0 .. $9)"

    ^ asciivalue between:($0 asciiValue) and:($9 asciiValue)
!

isDigitRadix:r
    "return true, if I am a digit of a base r number"

    (asciivalue < $0 asciiValue) ifTrue:[^ false]. 
    (r > 10) ifTrue:[
	(asciivalue between:($0 asciiValue) and:($9 asciiValue)) ifTrue:[
	    ^ true
	].
	((asciivalue - $a asciiValue) between:0 and:(r - 10)) ifTrue:[
	    ^ true
	].
	^ (asciivalue - $A asciiValue) between:0 and:(r - 10)
    ].
    (asciivalue - $0 asciiValue) < r ifTrue:[^ true].
    ^ false
!

isEndOfLineCharacter
    "return true if I am a line delimitting character"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = _intVal(_INST(asciivalue));
    if ((val == '\n')
     || (val == '\r')
     || (val == '\f')) {
	RETURN ( true );
    }
%}
.
    ^ false
!

isLetter
    "return true, if I am a letter 
     - use isNationalLetter, if you are interrested in those."

%{  /*NOCONTEXT */

    REGISTER int val;

    val = _intVal(_INST(asciivalue));
    RETURN ( (((val >= 'a') && (val <= 'z')) ||
	      ((val >= 'A') && (val <= 'Z'))) ? true : false );
%}
!

isLetterOrDigit
    "return true, if I am a letter or a digit
     - use isNationalAlphaNumeric, if you are interrested in those."

%{  /* NOCONTEXT */

    REGISTER int val;

    val = _intVal(_INST(asciivalue));
    if ((val >= 'a') && (val <= 'z')) {
	RETURN ( true );
    }
    if ((val >= 'A') && (val <= 'Z')) {
	RETURN ( true );
    }
    if ((val >= '0') && (val <= '9')) {
	RETURN ( true );
    }
    RETURN ( false );
%}
!

isLowercase
    "return true, if I am a lower-case letter"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = _intVal(_INST(asciivalue));
#ifndef OLD
    /* iso8859 puts national lower case characters at e0 .. ff */
    if ((val >= 0xE0) && (val <= 0xFF)) {
	RETURN(true);
    }
#endif
    RETURN ( ((val >= 'a') && (val <= 'z')) ? true : false );
%}
!

isPrintable
    "return true, if the receiver is a useful printable character
     (see fileBrowsers showFile:-method on how it can be used)"

    (asciivalue between:32 and:127) ifTrue:[^ true].
    asciivalue == 13 ifTrue:[^ true].
    asciivalue == 9 ifTrue:[^ true].
    asciivalue == 10 ifTrue:[^ true].
    ^ self isNationalLetter
!

isSeparator
    "return true if I am a space, cr, tab, nl, or newPage"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = _intVal(_INST(asciivalue));
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
    if (val <= ' ')
#endif
	if ((val == ' ')
	 || (val == '\n') 
	 || (val == '\t')
	 || (val == '\r')
	 || (val == '\f')) {
	    RETURN ( true );
	}
%}
.
    ^ false
!

isUppercase
    "return true, if I am an upper-case letter"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = _intVal(_INST(asciivalue));
#ifndef OLD
    /* iso8859 puts national upper case characters at c0 .. df */
    if ((val >= 0xC0) && (val <= 0xDF)) {
	RETURN(true);
    }
#endif
    RETURN ( ((val >= 'A') && (val <= 'Z')) ? true : false );
%}
!

isVowel
    "return true, if I am a vowel (lower- or uppercase)"

    (self == $a) ifTrue:[^ true].
    (self == $e) ifTrue:[^ true].
    (self == $i) ifTrue:[^ true].
    (self == $o) ifTrue:[^ true].
    (self == $u) ifTrue:[^ true].
    (self == $A) ifTrue:[^ true].
    (self == $E) ifTrue:[^ true].
    (self == $I) ifTrue:[^ true].
    (self == $O) ifTrue:[^ true].
    (self == $U) ifTrue:[^ true].
    ^ false
! !

!Character class methodsFor:'documentation'!

version
    ^' $Header: /cvs/stx/stx/libbasic/Character.st,v 1.27 1995-12-07 21:31:57 cg Exp $'
! !