Character.st
author Claus Gittinger <cg@exept.de>
Mon, 17 Dec 2001 09:37:32 +0100
changeset 6324 377c841da590
parent 6066 ca508f7658dc
child 6327 6f90155dd142
permissions -rw-r--r--
added #null

"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

"{ Package: 'stx:libbasic' }"

Magnitude subclass:#Character
	instanceVariableNames:'asciivalue'
	classVariableNames:'CharacterTable'
	poolDictionaries:''
	category:'Magnitude-General'
!

!Character class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    This class represents characters.
    Notice, that actual character objects are not used when characters
    are stored in strings, symbols or twoByteStrings; these only store
    a characters asciiValue for a more compact representation.
    The word 'asciiValue' is a historic leftover - actually, any integer
    code is allowed (i.e. characters are not limited to 8bit).

    Single byte Characters are unique; 
    i.e. for every asciiValue (0..255) there exists exactly one instance of 
    Character, which is shared (Character value:xxx checks for this, and returns 
    a reference to an existing instance).
    Other characters (i.e. asciivalue > 255) are not shared; i.e. these
    are created as required.

    This means: you may compare characters using #== ONLY IFF you are certain,
    that the characters ranges is 0..255. Otherwise, you HAVE TO compare
    using #=. (if in doubt, always compare using #=).
    Sorry for this inconvenience, but it is (practically) impossible to keep
    the possible maximum of 2^32 characters (Unicode) around, for that
    convenience alone.


    Methods marked as (JS) come from the manchester Character goody
    (CharacterComparing) by Jan Steinman, which allow Characters to be used as
    Interval elements (i.e. ($a to:$z) do:[...] ); 
    They are not a big deal, but convenient add-ons.
    Some of these have been modified a bit.

    WARNING: characters are known by compiler and runtime system -
	     do not change the instance layout. 

    Also, although you can create subclasses of Character, the compiler always
    creates instances of Character for literals ...
    ... and other classes are hard-wired to always return instances of characters
    in some cases (i.e. String>>at:, Symbol>>at: etc.).
    Therefore, it may not make sense to create a character-subclass.

    [author:]
	Claus Gittinger

    [see also:]
	String TwoByteString
	StringCollection
"
! !

!Character class methodsFor:'instance creation'!

basicNew
    "catch new - Characters cannot be created with new"

    ^ self error:'Characters cannot be created with new'
!

digitValue:anInteger
    "return a character that corresponds to anInteger.
     0-9 map to $0-$9, 10-35 map to $A-$Z"

    |val "{ Class: SmallInteger }" |

    val := anInteger.
    (val between:0 and:9) ifTrue:[
	^ Character value:(val + ($0 asciiValue))
    ].
    (val between:10 and:35) ifTrue:[
	^ Character value:(val + ($A asciiValue - 10))
    ].
    ^self error:'value not in range 0 to 35'
!

value:anInteger
    "return a character with asciivalue anInteger"

%{  /* NOCONTEXT */

    INT ascii;

    if (__isSmallInteger(anInteger)) {
	ascii = __intVal(anInteger);
	if ((ascii & ~0xFF) == 0 /* (ascii >= 0) && (ascii <= 255) */)
	    RETURN ( __MKCHARACTER(ascii) );
    }
%}.
    (anInteger between:0 and:16rFF) ifTrue:[
	^ CharacterTable at:(anInteger + 1)
    ].
    (anInteger between:16r100 and:16rFFFF) ifTrue:[
	^ super basicNew setAsciiValue:anInteger
    ].
    "
     a characters ascii-code must be 0..16rFFFF.
     (i.e. only single-byte and twoByte characters are allowed.)
    "
    self error:'invalid ascii code for character'
! !

!Character class methodsFor:'Compatibility - ANSI'!

codePoint:asciiValue
    ^ self value:asciiValue
! !

!Character class methodsFor:'accessing untypeable characters'!

endOfInput
	"Answer the Character representing ctrl-d ."

	^self value: 4
!

leftParenthesis
	"Answer the Character representing a left parenthesis."

	^self value: 40
!

period
	"Answer the Character representing a carriage period."

	^self value: 46
!

poundSign
	"Answer the Character representing a pound sign."

	^self value: 35
!

rightParenthesis
	"Answer the Character representing a right parenthesis."

	^self value: 41
! !

!Character class methodsFor:'constants'!

backspace
    "return the backspace character"

    ^ Character value:8
!

bell
    "return the bell character"

    ^ Character value:7
!

cr
    "return the lineEnd character 
     - actually (in unix) this is a newline character"

    ^ Character value:10
!

del 
    "return the delete character"

    ^ Character value:16r7F 
!

doubleQuote
    "return the double-quote character"

    ^ Character value:34
!

esc
    "return the escape character"

    ^ Character value:27
!

excla
    "return the exclamation-mark character"
    ^ $!!
!

ff
    "return the form-feed character"

    ^ Character value:12
!

lf
    "return the newline/linefeed character"

    ^ Character value:10
!

linefeed
    "squeak compatibility: return the newline/linefeed character"

    ^ Character value:10
!

maxValue 
    "return the maximum asciiValue a character can have"

    ^ 16rFFFF
!

newPage
    "return the form-feed character"

    ^ Character value:12
!

nl
    "return the newline character"

    ^ Character value:10
!

null
    ^ Character value:0
!

quote
    "return the single-quote character"

    ^ Character value:39
!

return 
    "return the return character.
     In ST/X, this is different from cr - for Unix reasons."

    ^ Character value:13 
!

space
    "return the blank character"

    ^ Character value:32
!

tab
    "return the tabulator character"

    ^ Character value:9
! !

!Character class methodsFor:'primitive input'!

fromUser
    "return a character from the keyboard (C's standard input stream)
     - this should only be used for emergency evaluators and the like."

%{  /* NOCONTEXT */
    int c;

    c = getchar();
    if (c < 0) {
        RETURN (nil);
    }
    RETURN ( __MKCHARACTER(c & 0xFF) );
%}.
    ^ Stdin next
! !

!Character class methodsFor:'queries'!

allCharacters
    "added for squeak compatibility: return a collection of all chars"

    ^ CharacterTable
        
    "
     Character allCharacters 
    "
!

hasSharedInstances
    "return true if this class has shared instances, that is, instances
     with the same value are identical.
     False is returned here, only redefined in classes which have unified
     instances (or should be treated so)."

    ^ true


!

isBuiltInClass
    "return true if this class is known by the run-time-system.
     Here, true is returned for myself, false for subclasses."

    ^ self == Character

    "Modified: 23.4.1996 / 15:56:39 / cg"
!

separators
    "added for squeak compatibility: return a collection of separator chars"

    ^ Array 
	with:Character space
	with:Character cr
	with:Character tab
	with:Character lf
	with:Character ff
        
    "
     Character separators
    "
! !

!Character methodsFor:'Compatibility - ANSI'!

codePoint
    ^ self asciiValue
! !

!Character methodsFor:'Compatibility - Dolphin'!

isControl
    ^ self asciiValue <= 16r20
!

isHexDigit
    ^ '0123456789abcdefABCDEF' includes:self
!

isPunctuation
    ^ '.,' includes:self
! !

!Character methodsFor:'accessing'!

asciiValue
    "return the asciivalue of myself.
     The name 'asciiValue' is a historic leftover - characters are not
     limited to 8bit characters.
     PP has removed this methhod with 4.1 and providing
     asInteger instead."

    ^asciivalue

    "Modified: 27.6.1996 / 12:34:34 / cg"
!

instVarAt:index put:anObject
    "catch instvar access - asciivalue may not be changed"

    self error:'Characters may not be modified'
!

newDigitValue
	"Answer 0-9 if the receiver is $0-$9, 10-35 if it is $A-$Z, and < 0 otherwise. 
	This is used to parse literal numbers of radix 2-36."

	| code value |
	code := self asInteger.
	value := code - $0 asInteger.
	value <= 9 ifTrue: [^value].
	value := code - $A asInteger.
	(value >= 0 and: [value < 26])
		ifTrue: [^value + 10].
	^-1
! !

!Character methodsFor:'arithmetic'!

+ aMagnitude
    "Return the Character that is <aMagnitude> higher than the receiver. 
     Wrap if the resulting value is not a legal Character value. (JS)"

    ^ Character value:(asciivalue + aMagnitude asInteger \\ 256)

    "
     $A + 5
    "

    "Modified: 27.6.1996 / 12:34:51 / cg"
!

- aMagnitude
    "Return the Character that is <aMagnitude> lower than the receiver.  
     Wrap if the resulting value is not a legal Character value. (JS)
     claus: 
	modified to return the difference as integer, if the argument
	is another character. If the argument is a number, a character is
	returned."

    aMagnitude isCharacter ifTrue:[
	^ self asciiValue - aMagnitude asciiValue
    ].
    ^ Character value:(asciivalue - aMagnitude asInteger \\ 256)

    "
     $z - $a  
     $d - 3
    "

    "Modified: 27.6.1996 / 12:35:34 / cg"
!

// aMagnitude
    "Return the Character who's value is the receiver divided by <aMagnitude>. 
     Wrap if the resulting value is not a legal Character value. (JS)"

    ^ Character value:(asciivalue // aMagnitude asInteger \\ 256)
!

\\ aMagnitude
    "Return the Character who's value is the receiver modulo <aMagnitude>.  
     Wrap if the resulting value is not a legal Character value. (JS)"

    ^ Character value:(asciivalue \\ aMagnitude asInteger \\ 256)
! !

!Character methodsFor:'binary storage'!

hasSpecialBinaryRepresentation
    "return true, if the receiver has a special binary representation"

    ^ true
!

storeBinaryOn:stream manager:manager
    "store a binary representation of the receiver on stream;
     redefined, since single-byte characters are stored more compact
     with a special type-code followed by the asciiValue."

    (asciivalue < 256) ifTrue:[
	stream nextPut:manager codeForCharacter; nextPut:asciivalue.
    ] ifFalse:[
	stream nextPut:manager codeForTwoByteCharacter.
	stream nextPutShort:asciivalue MSB:true
    ]
! !

!Character methodsFor:'comparing'!

< aCharacter
    "return true, if the arguments asciiValue is greater than mine"

    ^ (asciivalue < aCharacter asInteger)
!

<= aCharacter
    "return true, if the arguments asciiValue is greater or equal to mine"

    ^ (asciivalue <= aCharacter asInteger)
!

= aCharacter
    "return true, if the argument, aCharacter is the same character
     Redefined to take care of 16bit characters."

    self == aCharacter ifTrue:[^ true].
    aCharacter isCharacter ifFalse:[^ false].
    ^ (asciivalue = aCharacter asciiValue)
!

> aCharacter
    "return true, if the arguments asciiValue is less than mine"

    ^ (asciivalue > aCharacter asInteger)
!

>= aCharacter
    "return true, if the arguments asciiValue is less or equal to mine"

    ^ (asciivalue >= aCharacter asInteger)
!

hash
    "return an integer useful for hashing"

    ^ asciivalue
!

identityHash
    "return an integer useful for hashing on identity"

    asciivalue <= 255 ifTrue:[
	^ 4096 + asciivalue
    ].
    ^ super identityHash
!

sameAs:aCharacter
    "return true, if the argument, aCharacter is the same character,
     ignoring case differences."

    self == aCharacter ifTrue:[^ true].
    ^ self asLowercase = aCharacter asLowercase
!

~= aCharacter
    "return true, if the argument, aCharacter is not the same character
     Redefined to take care of 16bit characters."

    self == aCharacter ifTrue:[^ false].
    aCharacter isCharacter ifFalse:[^ true].
    ^ (asciivalue ~~ aCharacter asciiValue)
! !

!Character methodsFor:'converting'!

asCharacter
    "usually sent to integers, but redefined here to allow integers
     and characters to be used commonly without a need for a test."

    ^ self

    "
     32 asCharacter  
    "
!

asInteger
    "return an Integer with my ascii-value.
     OWST4.2 compatibility (sigh)"

    ^ asciivalue
!

asLowercase
    "return a character with same letter as the receiver,
     but lowercase (the receiver if its lowercase or nonLetter)"

    |code "{Class: SmallInteger }"|

"/ the old code:
"/    self isUppercase ifFalse:[^ self].
"/    ^ Character value:(asciivalue + 32)

    code := asciivalue.

    "/ ISO Latin-1
    ((code >= $A asciiValue) and:[code <= $Z asciiValue]) ifTrue:[
	^ Character value:(code + ($a asciiValue - $A asciiValue))
    ].
    code < 16r00C0 ifTrue:[^ self].
    code < 16r0100 ifTrue:[
	code >= 16r00DF ifTrue:[^ self].
	code == 16r00D7 ifTrue:[^ self].
	^ Character value:(code + 16r20)
    ].

    "/ mhmh - in which encoding is this character.
    "/ here, assume Unicode

    'Character [info]: Unicode support is under construction' infoPrintCR.
    ^ self

    "
     $A asLowercase 
     $1 asLowercase  
    "

    "Modified: 10.1.1997 / 15:35:44 / cg"
!

asString
    "return a string of len 1 with myself as contents"

%{  /* NOCONTEXT */
    char buffer[2];
    OBJ s;
    unsigned val;

    val = __intVal(_characterVal(self));
    if (val <= 0xFF) {
        buffer[0] = (char) val;
        buffer[1] = '\0';
        s = __MKSTRING_L(buffer, 1 COMMA_SND);
        if (s != nil) {
            RETURN (s);
        }
    }
%}.
    asciivalue > 255 ifTrue:[
        ^ (TwoByteString new:1) at:1 put:self; yourself
    ].


    ^ (String new:1) at:1 put:self; yourself.
!

asSymbol
    "return a unique symbol which prints like I print"

    ^ Symbol internCharacter:self
!

asUnicodeString
    "return a unicode string of len 1 with myself as contents"

    ^ (UnicodeString new:1) at:1 put:self; yourself.
!

asUppercase
    "return a character with same letter as the receiver,
     but uppercase (the receiver if its uppercase or nonLetter)"

    |code "{Class: SmallInteger }"|

"/ the old code:
"/    self isLowercase ifFalse:[^ self].
"/    ^ Character value:(asciivalue - 32)

    code := asciivalue.

    "/ ISO Latin-1
    ((code >= $a asciiValue) and:[code <= $z asciiValue]) ifTrue:[
	^ Character value:(code + ($A asciiValue - $a asciiValue))
    ].
    code < 16r00E0 ifTrue:[^ self].
    code < 16r0100 ifTrue:[
	code == 16r00F7 ifTrue:[^ self]. "/ division
"/        code == 16r00FF ifTrue:[^ Character value:16r0178].  "/ y diaresis (no uppercase equivalent in ISO-latin 1
	code == 16r00FF ifTrue:[^ self]. 

	^ Character value:(code - 16r20)
    ].

    "/ mhmh - in which encoding is this character.
    "/ here, assume Unicode

    'Character [info]: Unicode support is under construction' infoPrintCR.
    ^ self

    "
     $A asLowercase 
     $1 asLowercase  
    "

    "Modified: 10.1.1997 / 15:35:51 / cg"
!

decodeAsLiteralArray
    "given a literalEncoding in the receiver,
     create & return the corresponding object.
     The inverse operation to #literalArrayEncoding."

    ^ self

    "Created: / 27.10.1997 / 14:40:16 / cg"
!

digitValue
    "return my digitValue for any base"

    |ascii "{ Class: SmallInteger }" |

    ascii := asciivalue.
    (ascii between:($0 asciiValue) and:($9 asciiValue)) ifTrue:[
	^ ascii - $0 asciiValue
    ].
    (ascii between:($a asciiValue) and:($z asciiValue)) ifTrue:[
	^ ascii + (10 - $a asciiValue)
    ]. 
    (ascii between:($A asciiValue) and:($Z asciiValue)) ifTrue:[
	^ ascii + (10 - $A asciiValue)
    ]. 

"remove error below for X3J20 conformance ... "
    self error:'bad character'.
" "
    ^ -1
!

literalArrayEncoding
    "encode myself as an array literal, from which a copy of the receiver
     can be reconstructed with #decodeAsLiteralArray."

    ^ self

    "Created: / 27.10.1997 / 14:40:37 / cg"
!

to:aMagnitude
    "Return an Interval over the characters from the receiver to <aMagnitude>. 
     Wrap <aMagnitude> if it is not a legal Character value. (JS)"

    ^ Interval from:self to:(aMagnitude \\ 256)
!

utf8Encoded
    "convert a character from ISO8859-1 to its UTF-8 encoding.
     this returns a String"

    |s|

    s := (String new:6) writeStream.
    self utf8EncodedOn:s.
    ^ s contents
!

utf8EncodedOn:aStream
    "convert a character from ISO8859-1 to its UTF-8 encoding 
     and store it onto aStream.
     Up to 31 bits can be encoded in up to 6 bytes"

    |b1 b2 b3 b4 b5 v|

    asciivalue <= 16r7F ifTrue:[
        aStream nextPut:self.
        ^ self.
    ].

    b1 := Character value:((asciivalue bitAnd:16r3F) bitOr:16r80).
    v := asciivalue bitShift:-6.
    v <= 16r1F ifTrue:[
        aStream nextPut:(Character value:(v bitOr:16rC0)).
        aStream nextPut:b1.
        ^ self.
    ].
    b2 := Character value:((v bitAnd:16r3F) bitOr:16r80).
    v := v bitShift:-6.
    v <= 16r0F ifTrue:[
        aStream nextPut:(Character value:(v bitOr:16rC8)).
        aStream nextPut:b2.
        aStream nextPut:b1.
        ^ self.
    ].
    b3 := Character value:((v bitAnd:16r3F) bitOr:16r80).
    v := v bitShift:-6.
    v <= 16r07 ifTrue:[
        aStream nextPut:(Character value:(v bitOr:16rCC)).
        aStream nextPut:b3.
        aStream nextPut:b2.
        aStream nextPut:b1.
        ^ self.
    ].
    b4 := Character value:((v bitAnd:16r3F) bitOr:16r80).
    v := v bitShift:-6.
    v <= 16r03 ifTrue:[
        aStream nextPut:(Character value:(v bitOr:16rF0)).
        aStream nextPut:b4.
        aStream nextPut:b3.
        aStream nextPut:b2.
        aStream nextPut:b1.
        ^ self.
    ].
    b5 := Character value:((v bitAnd:16r3F) bitOr:16r80).
    v := v bitShift:-6.
    v <= 16r01 ifTrue:[
        aStream nextPut:(Character value:(v bitOr:16rF8)).
        aStream nextPut:b5.
        aStream nextPut:b4.
        aStream nextPut:b3.
        aStream nextPut:b2.
        aStream nextPut:b1.
        ^ self.
    ].

        
    "
      $a utf8Encoded
      $ü utf8Encoded
    "
! !

!Character methodsFor:'copying'!

copy
    "return a copy of myself
     reimplemented since characters are unique"

     ^ self
!

copyUpToLast: char 

	^self
!

deepCopyUsing:aDictionary
    "return a deep copy of myself
     reimplemented since characters are immutable"

     ^ self
!

shallowCopy
    "return a shallow copy of myself
     reimplemented since characters are immutable"

     ^ self
!

simpleDeepCopy
    "return a deep copy of myself
     reimplemented since characters are immutable"

     ^ self
! !

!Character methodsFor:'encoding'!

encodeOn:anEncoder with:aParameter

    anEncoder encodeCharacter:self with:aParameter


! !

!Character methodsFor:'national testing'!

isNationalAlphaNumeric
    "return true, if the receiver is a letter or digit in the
     current language (Language variable)"

    "stupid - should be configurable from a table ...
     ... good thing is, that iso8859 puts all national
	 characters above 16rC0"

    self isLetterOrDigit ifTrue:[^ true].
    ^ self isNationalLetter

    "Modified: 21.1.1997 / 11:10:54 / cg"
!

isNationalLetter
    "return true, if the receiver is a letter in the
     current language (Language variable)"

    "stupid - should be configurable from a table ...
     ... good thing is, that iso8859 puts all national
	 characters above 16rC0"

    self isLetter ifTrue:[^ true].
    (asciivalue between:16rC0 and:16rD6) ifTrue:[^ true].
    (asciivalue between:16rD8 and:16rF6) ifTrue:[^ true].
    (asciivalue between:16rF8 and:16rFF) ifTrue:[^ true].
    ^ false.

    "Modified: 21.1.1997 / 11:13:02 / cg"
! !

!Character methodsFor:'printing & storing'!

displayString
    "return a string used when the receiver is to be displayed
     in an inspector kind-of-thing"

    ^ (self storeString , ' "16r' , (asciivalue printStringRadix:16) , '"')
!

isLiteral
    "return true, if the receiver can be used as a literal constant in ST syntax
     (i.e. can be used in constant arrays)"

    ^ true
!

print
    "print myself on stdout.
     This method does NOT (by purpose) use the stream classes and 
     will therefore work even in case of emergency (but only, if Stdout is nil)."

%{  /* NOCONTEXT */

    if (@global(Stdout) == nil) {
        putchar(__intVal(__INST(asciivalue)));
        RETURN(self);
    }
%}.
    super print
!

printOn:aStream
    "print myself on aStream"

    aStream nextPut:self
!

printString
    "return a string to print me"

    ^ self asString
!

reconstructOn:aStream
    aStream nextPut:$$; nextPut:self

    "Created: 6.2.1996 / 09:46:12 / stefan"
    "Modified: 6.2.1996 / 10:57:35 / stefan"
!

storeOn:aStream
    "store myself on aStream"

    |special|

    (asciivalue between:33 and:127) ifFalse:[
	(self == Character space) ifTrue:[
	    special := 'space'
	] ifFalse:[
	    (self == Character cr) ifTrue:[
		special := 'cr'.
	    ] ifFalse:[
		(self == Character tab) ifTrue:[
		    special := 'tab'.
		] ifFalse:[
		    (self == Character esc) ifTrue:[
			special := 'esc'.
		    ]
		]
	    ]
	].
	special notNil ifTrue:[
	    aStream nextPutAll:'(Character '; nextPutAll:special; nextPut:$).
	    ^ self
	].
	aStream nextPutAll:'(Character value:16r'.
	asciivalue printOn:aStream base:16.
	aStream nextPut:$)
    ] ifTrue:[
	aStream nextPut:$$; nextPut:self
    ]

    "Modified: / 23.2.1996 / 23:27:32 / cg"
    "Modified: / 20.1.1998 / 14:10:46 / stefan"
! !

!Character methodsFor:'private accessing'!

setAsciiValue:anInteger
    "very private - set the ascii value. 
     - use this only for characters with codes > 16rFF.
     DANGER alert: funny things happen, if this is applied to
     one of the fixed-characters 0..255."

    asciivalue := anInteger
! !

!Character methodsFor:'testing'!

isAlphaNumeric
    "return true, if I am a letter or a digit
     - same as isAlphaNumeric for compatibility reasons."

    ^ self isLetterOrDigit
!

isCharacter
    "return true,  if the receiver is some kind of character"

    ^ true
!

isControlCharacter
    "return true if I am a control character (i.e. ascii value < 32)"

%{  /* NOCONTEXT */
#ifdef NON_ASCII       /* i.e. EBCDIC ;-) */
    not yet implemented - fails when compiled
#else
    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    if (val < ' ') {
        RETURN ( true );
    }
#endif
    RETURN (false);
%}.
    ^ asciivalue < 16r20.

    "
     (Character value:1) isControlCharacter
     $a isControlCharacter
    "
!

isDigit
    "return true, if I am a digit (i.e. $0 .. $9)"

    ^ asciivalue between:($0 asciiValue) and:($9 asciiValue)
!

isDigitRadix:r
    "return true, if I am a digit of a base r number"

    (asciivalue < $0 asciiValue) ifTrue:[^ false]. 
    (r > 10) ifTrue:[
	(asciivalue between:($0 asciiValue) and:($9 asciiValue)) ifTrue:[
	    ^ true
	].
	((asciivalue - $a asciiValue) between:0 and:(r - 11)) ifTrue:[
	    ^ true
	].
	^ (asciivalue - $A asciiValue) between:0 and:(r - 11)
    ].
    (asciivalue - $0 asciiValue) < r ifTrue:[^ true].
    ^ false
!

isEndOfLineCharacter
    "return true if I am a line delimitting character"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    if ((val == '\n')
     || (val == '\r')
     || (val == '\f')) {
        RETURN ( true );
    }
    RETURN (false);
%}.
    ^ asciivalue == 16r0A
      or:[asciivalue == 16r0D
      or:[asciivalue == 16r0C]]

!

isImmediate
    "return true if I am an immediate object
     i.e. I am represented in the pointer itself and
     no real object header/storage is used me.
     For VW compatibility, shared characters (i.e. in the range 0..255) 
     also return true here"

    ^ asciivalue <= 255

!

isLetter
    "return true, if I am a letter 
     - use isNationalLetter, if you are interested in those."

%{  /*NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    RETURN ( (((val >= 'a') && (val <= 'z')) ||
              ((val >= 'A') && (val <= 'Z'))) ? true : false );
%}.
    ^ (asciivalue between:($a asciiValue) and:($z asciiValue))
      or:[(asciivalue between:($A asciiValue) and:($Z asciiValue))]
!

isLetterOrDigit
    "return true, if I am a letter or a digit
     - use isNationalAlphaNumeric, if you are interested in those."

%{  /* NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    if ((val >= 'a') && (val <= 'z')) {
        RETURN ( true );
    }
    if ((val >= 'A') && (val <= 'Z')) {
        RETURN ( true );
    }
    if ((val >= '0') && (val <= '9')) {
        RETURN ( true );
    }
    RETURN ( false );
%}.
    ^ self isLetter or:[self isDigit]

!

isLowercase
    "return true, if I am a lower-case letter"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
#ifndef OLD
    /* iso8859 puts national lower case characters at e0 .. ff */
    if ((val >= 0xE0) && (val <= 0xFF)) {
        RETURN(true);
    }
#endif
    RETURN ( ((val >= 'a') && (val <= 'z')) ? true : false );
%}.
    ^ (asciivalue between:($a asciiValue) and:($z asciiValue))
      or:[asciivalue between:16rE0 and:16rFF]
!

isPrintable
    "return true, if the receiver is a useful printable character
     (see fileBrowsers showFile:-method on how it can be used)"

    (asciivalue between:32 and:127) ifTrue:[^ true].
    asciivalue == 13 ifTrue:[^ true].
    asciivalue == 9 ifTrue:[^ true].
    asciivalue == 10 ifTrue:[^ true].

    (asciivalue between:16rA0 and:16rBF) ifTrue:[^ true]. "/ ISO8859
    ^ self isNationalLetter

    "Modified: 7.8.1997 / 17:05:24 / cg"
!

isSeparator
    "return true if I am a space, cr, tab, nl, or newPage"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
    if (val <= ' ')
#endif
        if ((val == ' ')
         || (val == '\n') 
         || (val == '\t')
         || (val == '\r')
         || (val == '\f')) {
            RETURN ( true );
        }
    RETURN (false);
%}.                                  
    ^ (asciivalue == 16r20)
      or:[asciivalue == 16r0D
      or:[asciivalue == 16r0A
      or:[asciivalue == 16r09
      or:[asciivalue == 16r0C]]]]

!

isUppercase
    "return true, if I am an upper-case letter"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
#ifndef OLD
    /* iso8859 puts national upper case characters at c0 .. df */
    if ((val >= 0xC0) && (val <= 0xDF)) {
        RETURN(true);
    }
#endif
    RETURN ( ((val >= 'A') && (val <= 'Z')) ? true : false );
%}.
    ^ (asciivalue between:($A asciiValue) and:($Z asciiValue))
      or:[asciivalue between:16rC0 and:16rDF]

!

isVowel
    "return true, if I am a vowel (lower- or uppercase)"

    "/ I know the code is ugly;
    "/ better code is:
    "/     'aeiou' includes:self asLowercase
    "/ or:
    "/     'aeiouAEIOU' includes:self
    "/
    "/ until I have a smart compiler, I use the shorter (codewise):

    (self == $a) ifTrue:[^ true].
    (self == $e) ifTrue:[^ true].
    (self == $i) ifTrue:[^ true].
    (self == $o) ifTrue:[^ true].
    (self == $u) ifTrue:[^ true].
    (self == $A) ifTrue:[^ true].
    (self == $E) ifTrue:[^ true].
    (self == $I) ifTrue:[^ true].
    (self == $O) ifTrue:[^ true].
    (self == $U) ifTrue:[^ true].
    ^ false
! !

!Character methodsFor:'tracing'!

traceInto:aRequestor level:level from:referrer
    "double dispatch into tracer, passing my type implicitely in the selector"

    ^ aRequestor traceCharacter:self level:level from:referrer

! !

!Character class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/Character.st,v 1.71 2001-12-17 08:37:32 cg Exp $'
! !