Character.st
author Claus Gittinger <cg@exept.de>
Fri, 13 Jun 2003 20:52:21 +0200
changeset 7354 b22823ab2232
parent 7353 a5227413a23a
child 7688 8494a4155037
permissions -rw-r--r--
comment

"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

"{ Package: 'stx:libbasic' }"

Magnitude subclass:#Character
	instanceVariableNames:'asciivalue'
	classVariableNames:'CharacterTable'
	poolDictionaries:''
	category:'Magnitude-General'
!

!Character class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    This class represents characters.
    Notice, that actual character objects are not used when characters
    are stored in strings, symbols or twoByteStrings; these only store
    a characters asciiValue for a more compact representation.
    The word 'asciiValue' is a historic leftover - actually, any integer
    code is allowed (i.e. characters are not limited to 8bit).

    Single byte Characters are unique; 
    i.e. for every asciiValue (0..255) there exists exactly one instance of 
    Character, which is shared (Character value:xxx checks for this, and returns 
    a reference to an existing instance).
    Other characters (i.e. asciivalue > 255) are not shared; i.e. these
    are created as required.

    This means: you may compare characters using #== ONLY IFF you are certain,
    that the characters ranges is 0..255. Otherwise, you HAVE TO compare
    using #=. (if in doubt, always compare using #=).
    Sorry for this inconvenience, but it is (practically) impossible to keep
    the possible maximum of 2^32 characters (Unicode) around, for that
    convenience alone.


    Methods marked as (JS) come from the manchester Character goody
    (CharacterComparing) by Jan Steinman, which allow Characters to be used as
    Interval elements (i.e. ($a to:$z) do:[...] ); 
    They are not a big deal, but convenient add-ons.
    Some of these have been modified a bit.

    WARNING: characters are known by compiler and runtime system -
	     do not change the instance layout. 

    Also, although you can create subclasses of Character, the compiler always
    creates instances of Character for literals ...
    ... and other classes are hard-wired to always return instances of characters
    in some cases (i.e. String>>at:, Symbol>>at: etc.).
    Therefore, it may not make sense to create a character-subclass.

    [author:]
	Claus Gittinger

    [see also:]
	String TwoByteString
	StringCollection
"
! !

!Character class methodsFor:'instance creation'!

basicNew
    "catch new - Characters cannot be created with new"

    ^ self error:'Characters cannot be created with new'
!

digitValue:anInteger
    "return a character that corresponds to anInteger.
     0-9 map to $0-$9, 10-35 map to $A-$Z"

    |val "{ Class: SmallInteger }" |

    val := anInteger.
    (val between:0 and:9) ifTrue:[
	^ Character value:(val + ($0 asciiValue))
    ].
    (val between:10 and:35) ifTrue:[
	^ Character value:(val + ($A asciiValue - 10))
    ].
    ^self error:'value not in range 0 to 35'
!

utf8DecodeFrom:aStream
    "read and return a single unicode character from an UTF8 encoded stream"

    |fetchNext c1 c2 c3 c4 c5 ascii|

    c1 := aStream next.
    ascii := c1 asciiValue.
    ascii <= 16r7F ifTrue:[
        "/ 0xxxxxxx - 7 bits   
        ^ c1.
    ].

    (ascii bitAnd:2r11000000) == 2r10000000 ifTrue:[
        "/ out of sync (got an intermediate character)
        self error:'out of sync'.
        ^ c1.
    ].

    fetchNext := [  |ch|

                    ch := aStream next.
                    (ch asciiValue bitAnd:2r11000000) == 2r10000000 ifFalse:[
                        "/ followup chars must have 2r10 in high bits
                        self error:'bad encoding'.
                    ].
                    ch
                 ].

    (ascii bitAnd:2r11100000) == 2r11000000 ifTrue:[
        "/ 110xxxxx 10xxxxxx - 11 bits
        c2 := fetchNext value.
        ascii := c1 asciiValue bitAnd:16r1F.
        ascii := (ascii bitShift:6) bitOr:(c2 asciiValue bitAnd:16r3F).
        ascii <= 16r7F ifTrue:[
            self error:'invalid encoding'.
        ].
        ^ Character value:ascii        
    ].
    (ascii bitAnd:2r11110000) == 2r11100000 ifTrue:[
        "/ 1110xxxx 10xxxxxx 10xxxxxx - 16 bits
        c2 := fetchNext value.
        c3 := fetchNext value.
        ascii := c1 asciiValue bitAnd:16r0F.
        ascii := (ascii bitShift:6) bitOr:(c2 asciiValue bitAnd:16r3F).
        ascii := (ascii bitShift:6) bitOr:(c3 asciiValue bitAnd:16r3F).
        ascii <= 16r7FF ifTrue:[
            self error:'invalid encoding'.
        ].
        ^ Character value:ascii        
    ].

    "/ notice: currently, characters can only have 16bit encoding;
    "/ therefore the following will raise a runtime exception,

    (ascii bitAnd:2r11111000) == 2r11110000 ifTrue:[
        "/ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - 21 bits
        c2 := fetchNext value.
        c3 := fetchNext value.
        c4 := fetchNext value.
        ascii := c1 asciiValue bitAnd:16r07.
        ascii := (ascii bitShift:6) bitOr:(c2 asciiValue bitAnd:16r3F).
        ascii := (ascii bitShift:6) bitOr:(c3 asciiValue bitAnd:16r3F).
        ascii := (ascii bitShift:6) bitOr:(c4 asciiValue bitAnd:16r3F).
        ascii <= 16rFFFF ifTrue:[
            self error:'invalid encoding'.
        ].
        ^ Character value:ascii        
    ].

    (ascii bitAnd:2r11111100) == 2r11111000 ifTrue:[
        "/ 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - 26 bits
        c2 := fetchNext value.
        c3 := fetchNext value.
        c4 := fetchNext value.
        c5 := fetchNext value.
        ascii := c1 asciiValue bitAnd:16r03.
        ascii := (ascii bitShift:6) bitOr:(c2 asciiValue bitAnd:16r3F).
        ascii := (ascii bitShift:6) bitOr:(c3 asciiValue bitAnd:16r3F).
        ascii := (ascii bitShift:6) bitOr:(c4 asciiValue bitAnd:16r3F).
        ascii := (ascii bitShift:6) bitOr:(c5 asciiValue bitAnd:16r3F).
        ascii <= 16r1FFFFF ifTrue:[
            self error:'invalid encoding'.
        ].
        ^ Character value:ascii        
    ].

    (ascii bitAnd:2r11111110) == 2r11111100 ifTrue:[
        "/ 1111110x ... 10xxxxxx - any number of bits
        ascii := c1 asciiValue bitAnd:16r01.

        c2 := aStream peek.
        [ (c2 asciiValue bitAnd:2r11000000) == 2r10000000 ] whileTrue:[
            ascii := (ascii bitShift:6) bitOr:(c2 asciiValue bitAnd:16r3F).
            aStream next.
        ].
        ascii <= 16r3FFFFFF ifTrue:[
            self error:'invalid encoding'.
        ].
        ^ Character value:ascii        
    ].

    self error:'bad encoding'.
    ^ c1

    "
      Character utf8DecodeFrom:'a' readStream     
      Character utf8DecodeFrom:#[195 188] asString readStream   
    "                                            

    "test:

      |utf8Encoding original readBack|

      1 to:16rFFFF do:[:ascii |
        original := Character value:ascii.
        utf8Encoding := original asString utf8Encoded.
        readBack := Character utf8DecodeFrom:(utf8Encoding readStream).
        readBack asciiValue = ascii ifFalse:[
            self halt
        ]
      ]
    "
!

value:anInteger
    "return a character with asciivalue anInteger"

%{  /* NOCONTEXT */

    INT ascii;

    if (__isSmallInteger(anInteger)) {
	ascii = __intVal(anInteger);
	if ((ascii & ~0xFF) == 0 /* (ascii >= 0) && (ascii <= 255) */)
	    RETURN ( __MKCHARACTER(ascii) );
    }
%}.
    (anInteger between:0 and:16rFF) ifTrue:[
	^ CharacterTable at:(anInteger + 1)
    ].
    (anInteger between:16r100 and:16rFFFF) ifTrue:[
	^ super basicNew setAsciiValue:anInteger
    ].
    "
     a characters ascii-code must be 0..16rFFFF.
     (i.e. only single-byte and twoByte characters are allowed.)
    "
    self error:'invalid ascii code for character'
! !

!Character class methodsFor:'Compatibility-ANSI'!

codePoint:asciiValue
    ^ self value:asciiValue
! !

!Character class methodsFor:'accessing untypeable characters'!

endOfInput
	"Answer the Character representing ctrl-d ."

	^self value: 4
!

leftParenthesis
	"Answer the Character representing a left parenthesis."

	^self value: 40
!

period
	"Answer the Character representing a carriage period."

	^self value: 46
!

poundSign
	"Answer the Character representing a pound sign."

	^self value: 35
!

rightParenthesis
	"Answer the Character representing a right parenthesis."

	^self value: 41
! !

!Character class methodsFor:'constants'!

backspace
    "return the backspace character"

    ^ Character value:8
!

bell
    "return the bell character"

    ^ Character value:7
!

cr
    "return the lineEnd character 
     - actually (in unix) this is a newline character"

    ^ Character value:10
!

del 
    "return the delete character"

    ^ Character value:16r7F 
!

doubleQuote
    "return the double-quote character"

    ^ Character value:34
!

esc
    "return the escape character"

    ^ Character value:27
!

excla
    "return the exclamation-mark character"
    ^ $!!
!

ff
    "return the form-feed character"

    ^ Character value:12
!

lf
    "return the newline/linefeed character"

    ^ Character value:10
!

linefeed
    "squeak compatibility: return the newline/linefeed character"

    ^ Character value:10
!

maxValue 
    "return the maximum asciiValue a character can have"

    ^ 16rFFFF
!

newPage
    "return the form-feed character"

    ^ Character value:12
!

nl
    "return the newline character"

    ^ Character value:10
!

null
    ^ Character value:0
!

quote
    "return the single-quote character"

    ^ Character value:39
!

return 
    "return the return character.
     In ST/X, this is different from cr - for Unix reasons."

    ^ Character value:13 
!

space
    "return the blank character"

    ^ Character value:32
!

tab
    "return the tabulator character"

    ^ Character value:9
! !

!Character class methodsFor:'primitive input'!

fromUser
    "return a character from the keyboard (C's standard input stream)
     - this should only be used for emergency evaluators and the like."

%{  /* NOCONTEXT */
    int c;

    c = getchar();
    if (c < 0) {
        RETURN (nil);
    }
    RETURN ( __MKCHARACTER(c & 0xFF) );
%}.
    ^ Stdin next
! !

!Character class methodsFor:'queries'!

allCharacters
    "added for squeak compatibility: return a collection of all chars"

    ^ CharacterTable
        
    "
     Character allCharacters 
    "
!

hasSharedInstances
    "return true if this class has shared instances, that is, instances
     with the same value are identical.
     False is returned here, only redefined in classes which have unified
     instances (or should be treated so)."

    ^ true


!

isBuiltInClass
    "return true if this class is known by the run-time-system.
     Here, true is returned for myself, false for subclasses."

    ^ self == Character

    "Modified: 23.4.1996 / 15:56:39 / cg"
!

separators
    "added for squeak compatibility: return a collection of separator chars"

    ^ Array 
        with:Character space
        with:Character return
        "/ with:Character cr
        with:Character tab
        with:Character lf
        with:Character ff
        
    "
     Character separators
    "
! !

!Character methodsFor:'Compatibility-ANSI'!

codePoint
    ^ self asciiValue
! !

!Character methodsFor:'Compatibility-Dolphin'!

isAlphaNumeric
    "return true, if I am a letter or a digit
     Please use isLetterOrDigit for compatibility reasons (which is ANSI)."

    ^ self isLetterOrDigit
!

isAlphabetic
    "return true, if I am a letter.
     Please use isLetter for compatibility reasons (which is ANSI)."

    ^ self isLetter
!

isControl
    "return true if I am a control character (i.e. ascii value < 32)"

    ^ self isControlCharacter
!

isHexDigit
    "return true if I am a valid hexadecimal digit"

    ^ '0123456789abcdefABCDEF' includes:self

    "
     $a isHexDigit
    "
!

isPunctuation
    ^ (asciivalue between:16r21 and:16r40)
      or:[ (asciivalue between:16r5B and:16r60)
      or:[ (asciivalue between:123 and:126)
      or:[ (asciivalue between:161 and:191)
      or:[ (asciivalue == 215 )
      or:[ (asciivalue == 247 ) ]]]]]
! !

!Character methodsFor:'accessing'!

asciiValue
    "return the asciivalue of myself.
     The name 'asciiValue' is a historic leftover - characters are not
     limited to 8bit characters.
     PP has removed this methhod with 4.1 and providing
     asInteger instead."

    ^asciivalue

    "Modified: 27.6.1996 / 12:34:34 / cg"
!

instVarAt:index put:anObject
    "catch instvar access - asciivalue may not be changed"

    self error:'Characters may not be modified'
!

newDigitValue
	"Answer 0-9 if the receiver is $0-$9, 10-35 if it is $A-$Z, and < 0 otherwise. 
	This is used to parse literal numbers of radix 2-36."

	| code value |
	code := self asInteger.
	value := code - $0 asInteger.
	value <= 9 ifTrue: [^value].
	value := code - $A asInteger.
	(value >= 0 and: [value < 26])
		ifTrue: [^value + 10].
	^-1
! !

!Character methodsFor:'arithmetic'!

+ aMagnitude
    "Return the Character that is <aMagnitude> higher than the receiver. 
     Wrap if the resulting value is not a legal Character value. (JS)"

    ^ Character value:(asciivalue + aMagnitude asInteger \\ 16rFFFF)

    "
     $A + 5
    "

    "Modified: 27.6.1996 / 12:34:51 / cg"
!

- aMagnitude
    "Return the Character that is <aMagnitude> lower than the receiver.  
     Wrap if the resulting value is not a legal Character value. (JS)
     claus: 
        modified to return the difference as integer, if the argument
        is another character. If the argument is a number, a character is
        returned."

    aMagnitude isCharacter ifTrue:[
        ^ self asciiValue - aMagnitude asciiValue
    ].
    ^ Character value:(asciivalue - aMagnitude asInteger \\ 16rFFFF)

    "
     $z - $a  
     $d - 3
    "

    "Modified: 27.6.1996 / 12:35:34 / cg"
!

// aMagnitude
    "Return the Character who's value is the receiver divided by <aMagnitude>. 
     Wrap if the resulting value is not a legal Character value. (JS)"

    ^ Character value:(asciivalue // aMagnitude asInteger \\ 16rFFFF)
!

\\ aMagnitude
    "Return the Character who's value is the receiver modulo <aMagnitude>.  
     Wrap if the resulting value is not a legal Character value. (JS)"

    ^ Character value:(asciivalue \\ aMagnitude asInteger \\ 16rFFFF)
! !

!Character methodsFor:'binary storage'!

hasSpecialBinaryRepresentation
    "return true, if the receiver has a special binary representation"

    ^ true
!

storeBinaryOn:stream manager:manager
    "store a binary representation of the receiver on stream;
     redefined, since single-byte characters are stored more compact
     with a special type-code followed by the asciiValue."

    (asciivalue < 256) ifTrue:[
	stream nextPut:manager codeForCharacter; nextPut:asciivalue.
    ] ifFalse:[
	stream nextPut:manager codeForTwoByteCharacter.
	stream nextPutShort:asciivalue MSB:true
    ]
! !

!Character methodsFor:'comparing'!

< aCharacter
    "return true, if the arguments asciiValue is greater than mine"

    ^ (asciivalue < aCharacter asInteger)
!

<= aCharacter
    "return true, if the arguments asciiValue is greater or equal to mine"

    ^ (asciivalue <= aCharacter asInteger)
!

= aCharacter
    "return true, if the argument, aCharacter is the same character
     Redefined to take care of 16bit characters."

    self == aCharacter ifTrue:[^ true].
    aCharacter isCharacter ifFalse:[^ false].
    ^ (asciivalue = aCharacter asciiValue)
!

> aCharacter
    "return true, if the arguments asciiValue is less than mine"

    ^ (asciivalue > aCharacter asInteger)
!

>= aCharacter
    "return true, if the arguments asciiValue is less or equal to mine"

    ^ (asciivalue >= aCharacter asInteger)
!

hash
    "return an integer useful for hashing"

    ^ asciivalue
!

identityHash
    "return an integer useful for hashing on identity"

    asciivalue <= 255 ifTrue:[
	^ 4096 + asciivalue
    ].
    ^ super identityHash
!

sameAs:aCharacter
    "return true, if the argument, aCharacter is the same character,
     ignoring case differences."

    self == aCharacter ifTrue:[^ true].
    ^ self asLowercase = aCharacter asLowercase
!

~= aCharacter
    "return true, if the argument, aCharacter is not the same character
     Redefined to take care of 16bit characters."

    self == aCharacter ifTrue:[^ false].
    aCharacter isCharacter ifFalse:[^ true].
    ^ (asciivalue ~~ aCharacter asciiValue)
! !

!Character methodsFor:'converting'!

asCharacter
    "usually sent to integers, but redefined here to allow integers
     and characters to be used commonly without a need for a test."

    ^ self

    "
     32 asCharacter  
    "
!

asInteger
    "return an Integer with my ascii-value.
     OWST4.2 compatibility (sigh)"

    ^ asciivalue
!

asLowercase
    "return a character with same letter as the receiver,
     but lowercase (the receiver if its lowercase or nonLetter)"

    |code "{Class: SmallInteger }"|

"/ the old code:
"/    self isUppercase ifFalse:[^ self].
"/    ^ Character value:(asciivalue + 32)

    code := asciivalue.

    "/ ISO Latin-1
    ((code >= $A asciiValue) and:[code <= $Z asciiValue]) ifTrue:[
	^ Character value:(code + ($a asciiValue - $A asciiValue))
    ].
    code < 16r00C0 ifTrue:[^ self].
    code < 16r0100 ifTrue:[
	code >= 16r00DF ifTrue:[^ self].
	code == 16r00D7 ifTrue:[^ self].
	^ Character value:(code + 16r20)
    ].

    "/ mhmh - in which encoding is this character.
    "/ here, assume Unicode

    'Character [info]: Unicode support is under construction' infoPrintCR.
    ^ self

    "
     $A asLowercase 
     $1 asLowercase  
    "

    "Modified: 10.1.1997 / 15:35:44 / cg"
!

asString
    "return a string of len 1 with myself as contents"

%{  /* NOCONTEXT */
    char buffer[2];
    OBJ s;
    unsigned val;

    val = __intVal(_characterVal(self));
    if (val <= 0xFF) {
        buffer[0] = (char) val;
        buffer[1] = '\0';
        s = __MKSTRING_L(buffer, 1 COMMA_SND);
        if (s != nil) {
            RETURN (s);
        }
    }
%}.
    asciivalue > 255 ifTrue:[
        ^ (TwoByteString new:1) at:1 put:self; yourself
    ].


    ^ (String new:1) at:1 put:self; yourself.
!

asSymbol
    "return a unique symbol which prints like I print"

    ^ Symbol internCharacter:self
!

asUnicodeString
    "return a unicode string of len 1 with myself as contents"

    ^ (UnicodeString new:1) at:1 put:self; yourself.
!

asUppercase
    "return a character with same letter as the receiver,
     but uppercase (the receiver if its uppercase or nonLetter)"

    |code "{Class: SmallInteger }"|

"/ the old code:
"/    self isLowercase ifFalse:[^ self].
"/    ^ Character value:(asciivalue - 32)

    code := asciivalue.

    "/ ISO Latin-1
    ((code >= $a asciiValue) and:[code <= $z asciiValue]) ifTrue:[
	^ Character value:(code + ($A asciiValue - $a asciiValue))
    ].
    code < 16r00E0 ifTrue:[^ self].
    code < 16r0100 ifTrue:[
	code == 16r00F7 ifTrue:[^ self]. "/ division
"/        code == 16r00FF ifTrue:[^ Character value:16r0178].  "/ y diaresis (no uppercase equivalent in ISO-latin 1
	code == 16r00FF ifTrue:[^ self]. 

	^ Character value:(code - 16r20)
    ].

    "/ mhmh - in which encoding is this character.
    "/ here, assume Unicode

    'Character [info]: Unicode support is under construction' infoPrintCR.
    ^ self

    "
     $A asLowercase 
     $1 asLowercase  
    "

    "Modified: 10.1.1997 / 15:35:51 / cg"
!

decodeAsLiteralArray
    "given a literalEncoding in the receiver,
     create & return the corresponding object.
     The inverse operation to #literalArrayEncoding."

    ^ self

    "Created: / 27.10.1997 / 14:40:16 / cg"
!

digitValue
    "return my digitValue for any base"

    |ascii "{ Class: SmallInteger }" |

    ascii := asciivalue.
    (ascii between:($0 asciiValue) and:($9 asciiValue)) ifTrue:[
	^ ascii - $0 asciiValue
    ].
    (ascii between:($a asciiValue) and:($z asciiValue)) ifTrue:[
	^ ascii + (10 - $a asciiValue)
    ]. 
    (ascii between:($A asciiValue) and:($Z asciiValue)) ifTrue:[
	^ ascii + (10 - $A asciiValue)
    ]. 

"remove error below for X3J20 conformance ... "
    self error:'bad character'.
" "
    ^ -1
!

literalArrayEncoding
    "encode myself as an array literal, from which a copy of the receiver
     can be reconstructed with #decodeAsLiteralArray."

    ^ self

    "Created: / 27.10.1997 / 14:40:37 / cg"
!

to:aMagnitude
    "Return an Interval over the characters from the receiver to <aMagnitude>. 
     Wrap <aMagnitude> if it is not a legal Character value. (JS)"

    ^ Interval from:self to:(aMagnitude \\ 256)
!

utf8Encoded
    "convert a character from ISO8859-1 to its UTF-8 encoding.
     this returns a String"

    |s|

    s := (String new:6) writeStream.
    self utf8EncodedOn:s.
    ^ s contents
!

utf8EncodedOn:aStream
    "append my UTF-8 representation to the argument, aStream.
     Up to 31 bits can be encoded in up to 6 bytes. 
     However, currently, characters are limited to 16 bits."

    |b1 b2 b3 b4 b5 v|

    asciivalue <= 16r7F ifTrue:[
        aStream nextPut:self.
        ^ self.
    ].

    b1 := Character value:((asciivalue bitAnd:16r3F) bitOr:2r10000000).
    v := asciivalue bitShift:-6.
    v <= 16r1F ifTrue:[
        aStream nextPut:(Character value:(v bitOr:2r11000000)).
        aStream nextPut:b1.
        ^ self.
    ].
    b2 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
    v := v bitShift:-6.
    v <= 16r0F ifTrue:[
        aStream nextPut:(Character value:(v bitOr:2r11100000)).
        aStream nextPut:b2.
        aStream nextPut:b1.
        ^ self.
    ].
    b3 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
    v := v bitShift:-6.
    v <= 16r07 ifTrue:[
        aStream nextPut:(Character value:(v bitOr:2r11110000)).
        aStream nextPut:b3.
        aStream nextPut:b2.
        aStream nextPut:b1.
        ^ self.
    ].
    b4 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
    v := v bitShift:-6.
    v <= 16r03 ifTrue:[
        aStream nextPut:(Character value:(v bitOr:2r11111000)).
        aStream nextPut:b4.
        aStream nextPut:b3.
        aStream nextPut:b2.
        aStream nextPut:b1.
        ^ self.
    ].
    b5 := Character value:((v bitAnd:16r3F) bitOr:2r10000000).
    v := v bitShift:-6.
    v <= 16r01 ifTrue:[
        aStream nextPut:(Character value:(v bitOr:2r11111100)).
        aStream nextPut:b5.
        aStream nextPut:b4.
        aStream nextPut:b3.
        aStream nextPut:b2.
        aStream nextPut:b1.
        ^ self.
    ].
    "/ cannot happen - we only support up to 16 bit characters
    self error:'ascii value > 31bit in utf8Encode'.
        
    "
      $a utf8Encoded   
      $ü utf8Encoded asByteArray  
      (Character value:16r1fff) utf8Encoded asByteArray      
      (Character value:16rffff) utf8Encoded asByteArray      
      (Character value:16r800) utf8Encoded asByteArray      
    "
! !

!Character methodsFor:'copying'!

copy
    "return a copy of myself
     reimplemented since characters are unique"

     ^ self
!

copyUpToLast: char 

	^self
!

deepCopyUsing:aDictionary
    "return a deep copy of myself
     reimplemented since characters are immutable"

     ^ self
!

shallowCopy
    "return a shallow copy of myself
     reimplemented since characters are immutable"

     ^ self
!

simpleDeepCopy
    "return a deep copy of myself
     reimplemented since characters are immutable"

     ^ self
! !

!Character methodsFor:'encoding'!

encodeOn:anEncoder with:aParameter

    anEncoder encodeCharacter:self with:aParameter


!

rot13
     "Usenet: from `rotate alphabet 13 places'] 
      The simple Caesar-cypher encryption that replaces each English
      letter with the one 13 places forward or back along the alphabet, 
      so that 'The butler did it!!' becomes 'Gur ohgyre qvq vg!!'
      Most Usenet news reading and posting programs include a rot13 feature. 
      It is used to enclose the text in a sealed wrapper that the reader must choose
      to open -- e.g., for posting things that might offend some readers, or spoilers. 
      A major advantage of rot13 over rot(N) for other N is that it
      is self-inverse, so the same code can be used for encoding and decoding."

    ((self >= $a) & (self < $n)) | ((self >= $A) & (self < $N))
        ifTrue: [ ^ Character value:(self asciiValue + 13) ].
    ((self > $m) & (self <= $z)) | ((self > $M) & (self <= $Z))
        ifTrue: [ ^ Character value:(self asciiValue - 13) ].
    ^ self

    "
     $h rot13 
     $h rot13 rot13
     'The butler did it!!' rot13
    "
! !

!Character methodsFor:'national testing'!

isNationalAlphaNumeric
    "return true, if the receiver is a letter or digit in the
     current language (Language variable)"

    "stupid - should be configurable from a table ...
     ... good thing is, that iso8859 puts all national
	 characters above 16rC0"

    self isLetterOrDigit ifTrue:[^ true].
    ^ self isNationalLetter

    "Modified: 21.1.1997 / 11:10:54 / cg"
!

isNationalLetter
    "return true, if the receiver is a letter in the
     current language (Language variable)"

    "stupid - should be configurable from a table ...
     ... good thing is, that iso8859 puts all national
	 characters above 16rC0"

    self isLetter ifTrue:[^ true].
    (asciivalue between:16rC0 and:16rD6) ifTrue:[^ true].
    (asciivalue between:16rD8 and:16rF6) ifTrue:[^ true].
    (asciivalue between:16rF8 and:16rFF) ifTrue:[^ true].
    ^ false.

    "Modified: 21.1.1997 / 11:13:02 / cg"
! !

!Character methodsFor:'printing & storing'!

displayString
    "return a string used when the receiver is to be displayed
     in an inspector kind-of-thing"

    ^ (self storeString , ' "16r' , (asciivalue printStringRadix:16) , '"')
!

isLiteral
    "return true, if the receiver can be used as a literal constant in ST syntax
     (i.e. can be used in constant arrays)"

    ^ true
!

print
    "print myself on stdout.
     This method does NOT (by purpose) use the stream classes and 
     will therefore work even in case of emergency (but only, if Stdout is nil)."

%{  /* NOCONTEXT */

    if (@global(Stdout) == nil) {
        putchar(__intVal(__INST(asciivalue)));
        RETURN(self);
    }
%}.
    super print
!

printOn:aStream
    "print myself on aStream"

    aStream nextPut:self
!

printString
    "return a string to print me"

    ^ self asString
!

reconstructOn:aStream
    aStream nextPut:$$; nextPut:self

    "Created: 6.2.1996 / 09:46:12 / stefan"
    "Modified: 6.2.1996 / 10:57:35 / stefan"
!

storeOn:aStream
    "store myself on aStream"

    |special|

    (asciivalue between:33 and:127) ifFalse:[
	(self == Character space) ifTrue:[
	    special := 'space'
	] ifFalse:[
	    (self == Character cr) ifTrue:[
		special := 'cr'.
	    ] ifFalse:[
		(self == Character tab) ifTrue:[
		    special := 'tab'.
		] ifFalse:[
		    (self == Character esc) ifTrue:[
			special := 'esc'.
		    ]
		]
	    ]
	].
	special notNil ifTrue:[
	    aStream nextPutAll:'(Character '; nextPutAll:special; nextPut:$).
	    ^ self
	].
	aStream nextPutAll:'(Character value:16r'.
	asciivalue printOn:aStream base:16.
	aStream nextPut:$)
    ] ifTrue:[
	aStream nextPut:$$; nextPut:self
    ]

    "Modified: / 23.2.1996 / 23:27:32 / cg"
    "Modified: / 20.1.1998 / 14:10:46 / stefan"
! !

!Character methodsFor:'private-accessing'!

setAsciiValue:anInteger
    "very private - set the ascii value. 
     - use this only for characters with codes > 16rFF.
     DANGER alert: funny things happen, if this is applied to
     one of the fixed-characters 0..255."

    asciivalue := anInteger
! !

!Character methodsFor:'testing'!

isCharacter
    "return true,  if the receiver is some kind of character"

    ^ true
!

isControlCharacter
    "return true if I am a control character (i.e. ascii value < 32)"

%{  /* NOCONTEXT */
#ifdef NON_ASCII       /* i.e. EBCDIC ;-) */
    not yet implemented - fails when compiled
#else
    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    if (val < ' ') {
        RETURN ( true );
    }
#endif
    RETURN (false);
%}.
    ^ asciivalue < 16r20.

    "
     (Character value:1) isControlCharacter
     $a isControlCharacter
    "
!

isDigit
    "return true, if I am a digit (i.e. $0 .. $9)"

%{  /*NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    if ((unsigned)(val - '0') <= ('9' - '0')) {
        RETURN ( true );
    }
    RETURN ( false );
%}.
    ^ asciivalue between:$0 asciiValue and:$9 asciiValue
!

isDigitRadix:r
    "return true, if I am a digit of a base r number"

    (asciivalue < $0 asciiValue) ifTrue:[^ false]. 
    (r > 10) ifTrue:[
        (asciivalue <= $9 asciiValue) ifTrue:[
            ^ true
        ].
        ((asciivalue - $a asciiValue) between:0 and:(r - 11)) ifTrue:[
            ^ true
        ].
        ^ (asciivalue - $A asciiValue) between:0 and:(r - 11)
    ].
    (asciivalue - $0 asciiValue) < r ifTrue:[^ true].
    ^ false
!

isEndOfLineCharacter
    "return true if I am a line delimitting character"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    if ((val == '\n')
     || (val == '\r')
     || (val == '\f')) {
        RETURN ( true );
    }
    RETURN (false);
%}.
    ^ asciivalue == 16r0A
      or:[asciivalue == 16r0D
      or:[asciivalue == 16r0C]]

!

isImmediate
    "return true if I am an immediate object
     i.e. I am represented in the pointer itself and
     no real object header/storage is used me.
     For VW compatibility, shared characters (i.e. in the range 0..255) 
     also return true here"

    ^ asciivalue <= 255

!

isLetter
    "return true, if I am a letter 
     - use isNationalLetter, if you are interested in those."

%{  /*NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    if ((unsigned)(val - 'a') <= ('z' - 'a')) {
        RETURN ( true );
    }
    if ((unsigned)(val - 'A') <= ('Z' - 'A')) {
        RETURN ( true );
    }
    RETURN ( false );
%}.
    ^ (asciivalue between:($a asciiValue) and:($z asciiValue))
      or:[(asciivalue between:($A asciiValue) and:($Z asciiValue))]
!

isLetterOrDigit
    "return true, if I am a letter or a digit
     - use isNationalAlphaNumeric, if you are interested in those."

%{  /* NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    if ((unsigned)(val - 'a') <= ('z' - 'a')) {
        RETURN ( true );
    }
    if ((unsigned)(val - 'A') <= ('Z' - 'A')) {
        RETURN ( true );
    }
    if ((unsigned)(val - '0') <= ('9' - '0')) {
        RETURN ( true );
    }
    RETURN ( false );
%}.
    ^ self isLetter or:[self isDigit]
!

isLowercase
    "return true, if I am a lower-case letter"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    if ((unsigned)(val - 'a') <= ('z' - 'a')) {
        RETURN ( true );
    }
#ifndef OLD
    /* iso8859 puts national lower case characters at e0 .. ff */
    if ((val >= 0xE0) && (val <= 0xFF)) {
        RETURN(true);
    }
#endif
    RETURN ( false );
%}.
    ^ (asciivalue between:($a asciiValue) and:($z asciiValue))
      or:[asciivalue between:16rE0 and:16rFF]
!

isPrintable
    "return true, if the receiver is a useful printable character
     (see fileBrowsers showFile:-method on how it can be used)"

    (asciivalue between:32 and:127) ifTrue:[^ true].
    asciivalue == 12 ifTrue:[^ true].   "/ FF
    asciivalue == 13 ifTrue:[^ true].   "/ CR
    asciivalue == 9 ifTrue:[^ true].    "/ TAB
    asciivalue == 10 ifTrue:[^ true].   "/ NL

    (asciivalue between:16rA0 and:16rBF) ifTrue:[^ true]. "/ ISO8859
    ^ self isNationalLetter

    "Modified: 7.8.1997 / 17:05:24 / cg"
!

isSeparator
    "return true if I am a space, cr, tab, nl, or newPage"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
    if (val <= ' ')
#endif
        if ((val == ' ')
         || (val == '\n') 
         || (val == '\t')
         || (val == '\r')
         || (val == '\f')) {
            RETURN ( true );
        }
    RETURN (false);
%}.                                  
    ^ (asciivalue == 16r20)
      or:[asciivalue == 16r0D
      or:[asciivalue == 16r0A
      or:[asciivalue == 16r09
      or:[asciivalue == 16r0C]]]]

!

isUppercase
    "return true, if I am an upper-case letter"

%{  /* NOCONTEXT */

    REGISTER int val;

    val = __intVal(__INST(asciivalue));
    if ((unsigned)(val - 'A') <= ('Z' - 'A')) {
        RETURN ( true );
    }
#ifndef OLD
    /* iso8859 puts national upper case characters at c0 .. df */
    if ((val >= 0xC0) && (val <= 0xDF)) {
        RETURN(true);
    }
#endif
    RETURN ( false );
%}.
    ^ (asciivalue between:($A asciiValue) and:($Z asciiValue))
      or:[asciivalue between:16rC0 and:16rDF]
!

isVowel
    "return true, if I am a vowel (lower- or uppercase)"

    "/ I know the code is ugly;
    "/ better code is:
    "/     'aeiou' includes:self asLowercase
    "/ or:
    "/     'aeiouAEIOU' includes:self
    "/
    "/ until I have a smart compiler, I use the shorter (codewise):

    (self == $a) ifTrue:[^ true].
    (self == $e) ifTrue:[^ true].
    (self == $i) ifTrue:[^ true].
    (self == $o) ifTrue:[^ true].
    (self == $u) ifTrue:[^ true].
    (self == $A) ifTrue:[^ true].
    (self == $E) ifTrue:[^ true].
    (self == $I) ifTrue:[^ true].
    (self == $O) ifTrue:[^ true].
    (self == $U) ifTrue:[^ true].
    ^ false
! !

!Character methodsFor:'tracing'!

traceInto:aRequestor level:level from:referrer
    "double dispatch into tracer, passing my type implicitely in the selector"

    ^ aRequestor traceCharacter:self level:level from:referrer

! !

!Character class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/Character.st,v 1.87 2003-06-13 18:52:21 cg Exp $'
! !