CharacterArray.st
author ps
Thu, 30 Dec 1999 12:07:15 +0100
changeset 5156 f7d6a1e1a890
parent 5154 2143a3af5072
child 5221 ebdd9dee7c1f
permissions -rw-r--r--
checkin from browser

"
 COPYRIGHT (c) 1994 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

ByteArray variableByteSubclass:#CharacterArray
	instanceVariableNames:''
	classVariableNames:'PreviousMatch DecoderTables EncoderTables DecodingFailedSignal
		EncodingFailedSignal'
	poolDictionaries:''
	category:'Collections-Text'
!

!CharacterArray class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1994 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    CharacterArray is a superclass for all kinds of Strings (i.e.
    (singleByte-)Strings, TwoByteStrings and whatever comes in the future.

    This class is abstract, meaning that there are no instances of it. 
    All this class does is provide common protocol for concrete subclasses.

    [author:]
        Claus Gittinger

    [see also:]
        String TwoByteString
        StringCollection
"
! !

!CharacterArray class methodsFor:'initialization'!

initialize
    DecodingFailedSignal := Signal new mayProceed:true.
    DecodingFailedSignal notifierString:'decoding failure'.
    DecodingFailedSignal nameClass:self message:#decodingFailedSignal.

    EncodingFailedSignal := QuerySignal new mayProceed:true.
    EncodingFailedSignal notifierString:'encoding failure'.
    EncodingFailedSignal nameClass:self message:#encodingFailedSignal.

    "
     CharacterArray initialize
    "

    "Modified: 3.8.1997 / 18:15:59 / cg"
! !

!CharacterArray class methodsFor:'instance creation'!

basicNew
    "return a new empty string"

    ^ self basicNew:0
!

fromBytes:aByteCollection
    "return an instance of the receiver class,
     taking untranslated bytes from the argument, aByteCollection.
     Only useful, when reading twoByteStrings from external sources."

    |mySize nBytes newString dstIdx|

    nBytes := aByteCollection size.
    mySize := self basicNew bitsPerCharacter.
    mySize == 16 ifTrue:[
        newString := self basicNew:(nBytes // 2).
        dstIdx := 1.
        aByteCollection pairWiseDo:[:hi :lo |
            newString at:dstIdx put:(Character value:(hi bitShift:8)+lo).
            dstIdx := dstIdx + 1
        ].
        ^ newString.
    ].

    ^ (self basicNew:nBytes) replaceFrom:1 with:aByteCollection

    "
     TwoByteString fromBytes:#[16r21 16r21]
    "

    "Modified: 30.6.1997 / 20:08:37 / cg"
!

fromString:aString
    "return a copy of the argument, aString"

    ^ (self basicNew:(aString size)) replaceFrom:1 with:aString

    "TwoByteString fromString:'hello'"
!

fromStringCollection:aCollectionOfStrings
    "return new string formed by concatenating a copy of the argument, aString"

    ^ self fromStringCollection:aCollectionOfStrings separatedBy:''

    "
     String fromStringCollection:#('hello' 'world' 'how' 'about' 'this')
    "

    "Created: 20.11.1995 / 15:26:59 / cg"
!

fromStringCollection:aCollectionOfStrings separatedBy:aSeparatorString
    "return new string formed by concatenating a copy of the argument, aString"

    |newString first|

    newString := ''.
    first := true.
    aCollectionOfStrings do:[:s | 
	first ifFalse:[
	    newString := newString , aSeparatorString
	] ifTrue:[
	    first := false
	].
	newString := newString , s
    ].
    ^ newString

    "
     String fromStringCollection:#('hello' 'world' 'how' 'about' 'this') separatedBy:' '
    "

    "Created: 20.11.1995 / 15:32:17 / cg"
!

new
    "return a new empty string"

    ^ self basicNew:0
! !

!CharacterArray class methodsFor:'Signal constants'!

decodingFailedSignal
    "return the signal, raised when decoding of a string is not possible
     due to invalid characters contained in the source.
     This may happen for example, if a non EUC coded 8-bit string
     is attempted to be decoded into a JIS string."

    ^ DecodingFailedSignal

    "Created: 28.6.1997 / 20:09:55 / cg"
    "Modified: 3.8.1997 / 18:16:47 / cg"
!

encodingFailedSignal
    "return the (query-) signal, raised when encoding of a string is not possible
     due to invalid characters contained in the source."

    ^ EncodingFailedSignal

    "Modified: 28.6.1997 / 20:09:35 / cg"
    "Created: 3.8.1997 / 18:16:40 / cg"
! !

!CharacterArray class methodsFor:'cleanup'!

lowSpaceCleanup
    "cleanup in low-memory situations"

    DecoderTables := EncoderTables := nil

    "
     CharacterArray lowSpaceCleanup
    "

    "Created: 22.2.1996 / 16:30:30 / cg"
    "Modified: 22.2.1996 / 17:58:05 / cg"
! !

!CharacterArray class methodsFor:'code tables'!

classForEncoding:encodingSymbol
    "return a class capable of representing text encoded
     by encodingSymbol"

    encodingSymbol == #jis ifTrue:[^ JISEncodedString].
    encodingSymbol == #big5 ifTrue:[^ BIG5EncodedString].
    encodingSymbol == #gb ifTrue:[^ GBEncodedString].
    encodingSymbol == #ksc ifTrue:[^ KSCEncodedString].
    encodingSymbol == #unicode ifTrue:[^ UnicodeString].

    "/
    "/ assume its an 8-bit code
    "/
    ^ String

    "Modified: 30.6.1997 / 19:48:05 / cg"
!

decoderTableFor:encoding
    "return a table to decode from an alien encoding into
     the internal (iso8859, ansi) format.
     This is an experimental interface - unfinished"

    |table unrepresentableCharacterCode x80Table|

    DecoderTables notNil ifTrue:[
        table := DecoderTables at:encoding ifAbsent:nil.
        table notNil ifTrue:[
            table == #identity ifTrue:[^ nil].
            ^ table
        ].
    ] ifFalse:[
        DecoderTables := IdentityDictionary new
    ].

    unrepresentableCharacterCode := 16rBF.

    "/
    "/ setup as identityTranslation
    "/
    table := (0 to:255) asByteArray.

    "/
    "/ hp -> iso8859
    "/
    "/ hp uses a funny encoding of the A1 .. FF characters
    "/
    "/   
    encoding == #hp ifTrue:[
        unrepresentableCharacterCode := 16rA0.

        x80Table :=
            #(
               16r0080 16r0081 16r0082 16r0083 16r0084 16r0085 16r0086 16r0087  "/ 80
               16r0088 16r0089 16r008a 16r008b 16r008c 16r008d 16r008e 16r008f

               16r0090 16r0091 16r0092 16r0093 16r0094 16r0095 16r0096 16r0097  "/ 90
               16r0098 16r0099 16r009a 16r009b 16r009c 16r009d 16r009e 16r009f

               16r00a0 16r00c0 16r00c2 16r00c8 16r00c9 16r00cb 16r00ce 16r00cf  "/ a0
               16r00b4 16rFFFF 16rFFFF 16r00a8 16rFFFF 16r00d9 16r00da 16r00a3

               16rFFFF 16r00dd 16r00fd 16r00b0 16r00c7 16r00e7 16r00d1 16r00f1  "/ b0
               16r00a1 16r00bf 16r00a4 16r00a3 16r00a5 16r00a7 16rFFFF 16rFFFF

               16r00e2 16r00ea 16r00f4 16r00fb 16r00e1 16r00e9 16r00f3 16r00fa  "/ c0
               16r00e0 16r00e8 16r00f2 16r00f9 16r00e4 16r00eb 16r00f6 16r00fc

               16r00c5 16r00ee 16r00d8 16r00c6 16r00e5 16r00ed 16r00f8 16r00e6  "/ d0
               16r00c4 16r00ec 16r00d6 16r00dc 16r00c9 16r00ef 16r00df 16r00d4

               16r00c1 16r00c3 16r00e3 16r00d0 16r00f0 16r00cd 16r00cc 16r00d3  "/ e0
"/                                        S ?       s ?             Y ?
               16r00d2 16r00d5 16r00f5 16rFFFF 16rFFFF 16r00da 16rFFFF 16r00ff 

               16r00de 16r00fe 16r00b7 16r00b5 16r00b6 16r00be 16r00ad 16r00bc  "/ f0
               16r00bd 16r00aa 16r00ba 16r00ab 16rFFFF 16r00bb 16r00b1 16rFFFF
            ).
    ].

    "/
    "/ mac -> iso8859
    "/
    "/ You will loose the following characters:
    "/ - all of them will be replaced by 8F
    "/
    "/  A0 (dagger)
    "/  A5 (dot-dot)
    "/  AA (tm - trademark)
    "/  AD (not equal)
    "/  B0 (infinity)
    "/  B3 (less-double-equal <=)
    "/  B4 (greater-equal >-)
    "/  B6 (math lowercase delta)
    "/  B7 (math sum)
    "/  B8 (math uppercase pi)
    "/  B9 (math lowercase pi)
    "/  BA (math integral)
    "/  BD (math omega)
    "/  C3 (math union)
    "/  C4 (ext latin f)
    "/  C5 (math almost equal)
    "/  C6 (math uppercase delta)
    "/  C9 (dot-dot-dot)
    "/  CE (OE ligature)
    "/  CF (oe ligature)
    "/  D1 (hyphen)
    "/  D2 (opening top dquote)
    "/  D3 (closing top dquote)
    "/  D4 (opening top quote)
    "/  D5 (closing top quote)
    "/  D7 (geometric: )
    "/  D9 (uppercase Y diacrit )
    "/  DA (slash2 )
    "/  DC (single oldStyle opening quote (<) )
    "/  DD (single oldStyle closing quote (>) )
    "/  DE (? )
    "/  DF (? )
    "/  E0 (double dagger )
    "/  E2 (opening bottom quote )
    "/  E3 (opening bottom dquote )
    "/  F0 (? )
    "/  F5 (latin l )
    "/  F6 (diacrit circumflex)
    "/  F7 (diacrit tilde)
    "/  F8 (diacrit top line)
    "/  F9 (diacrit inverse circumflex)
    "/  FA (diacrit dot)
    "/  FB (diacrit ring)
    "/  FC (diacrit cedille left)
    "/  FD (diacrit dquote)
    "/  FE (diacrit cedille right)
    "/  FF (diacrit circumflex2 ?)
    "/   
    encoding == #mac ifTrue:[
        x80Table :=
            #( 16r00c4 16r00c5 16r00c7 16r00c9 16r00d1 16r00d6 16r00dc 16r00e1  "/ 80
               16r00e0 16r00e2 16r00e4 16r00e3 16r00e5 16r00e7 16r00e9 16r00e8

               16r00ea 16r00eb 16r00ed 16r00ec 16r00ee 16r00ef 16r00f1 16r00f3  "/ 90
               16r00f2 16r00f4 16r00f6 16r00f5 16r00fa 16r00f9 16r00fb 16r00fc

               16rFFFF 16r00b0 16r00a2 16r00a3 16r00a7 16rFFFF 16r00b6 16r00df  "/ a0
               16r00ae 16r00a9 16rFFFF 16r00b4 16r00a8 16rFFFF 16r00c6 16r00d8

               16rFFFF 16r00b1 16rFFFF 16rFFFF 16r00a5 16r00b5 16rFFFF 16rFFFF  "/ b0
               16rFFFF 16rFFFF 16rFFFF 16r00aa 16r00ba 16rFFFF 16r00e6 16r00f8

               16r00bf 16r00a1 16r00ac 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00ab  "/ c0
               16r00bb 16rFFFF 16r00a0 16r00c1 16r00c3 16r00d5 16rFFFF 16rFFFF

               16r00ad 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00f7 16rFFFF  "/ d0
               16rFFFF 16rFFFF 16rFFFF 16r00a4 16rFFFF 16rFFFF 16rFFFF 16rFFFF

               16rFFFF 16r00b7 16rFFFF 16rFFFF 16rFFFF 16r00c2 16r00ca 16r00c1  "/ e0
               16r00cb 16r00c8 16r00cd 16r00ce 16r00cf 16r00cc 16r00d3 16r00d4

               16rFFFF 16r00d2 16r00da 16r00db 16r00d9 16rFFFF 16rFFFF 16rFFFF  "/ f0
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF
            ).
    ].

    "/
    "/ next -> iso8859
    "/
    "/ You will loose the following characters:
    "/ - all of them will be replaced by 8F
    "/
    "/  A4 (diacrit /)
    "/  A9 (diacrit quote)
    "/  AA (top opening dquote)
    "/  AC (single oldStyle opening quote (<))
    "/  AD (single oldStyle closing quote (>))
    "/  AE (?)
    "/  AF (?)
    "/  B2 (dagger)
    "/  B3 (double dagger)
    "/  B7 (enter dot)
    "/  B8 (opening bottom quote)
    "/  B9 (opening bottom dquote)
    "/  BA (closing top dquote)
    "/  BC (dot-dot-dot)
    "/  BD (per-mille)
    "/  C0 (subscript 1)
    "/  C1 (accent grave)
    "/  C2 (accent degu)
    "/  C3 (accent circonflex)
    "/  C4 (accent tilde)
    "/  C5 (accent line)
    "/  C6 (accent circonflex reverse)
    "/  C7 (accent dot)
    "/  C8 (accent dot-dot)
    "/  D0 (hline)
    "/  E8 (engl. pound upper case L)
    "/  EA (OE ligature)
    "/  F5 (latin l)
    "/  F8 (engl. pound lower case l)
    "/  FE (?)
    "/  FF (?)
    "/
    encoding == #next ifTrue:[
        unrepresentableCharacterCode := 16rBF.

        x80Table :=
            #( 16r0080 16r00c0 16r00c1 16r00c2 16r00c3 16r00c4 16r00c5 16r00c7  "/ 80
               16r00c8 16r00c9 16r00ca 16r00cb 16r00cc 16r00cd 16r00ce 16r00cf

               16r00d0 16r00d1 16r00d2 16r00d3 16r00d4 16r00d5 16r00d6 16r00d9  "/ 90
               16r00da 16r00db 16r00dc 16r00dd 16r00de 16r00b5 16r00d7 16r00f7

               16r00a9 16r00a1 16r00a2 16r00a3 16rFFFF 16r00a5 16rFFFF 16r00a7  "/ a0
               16r00a4 16rFFFF 16rFFFF 16r00ab 16rFFFF 16rFFFF 16rFFFF 16rFFFF

               16r00ae 16r00ad 16rFFFF 16rFFFF 16r00b7 16r00a6 16r00b6 16rFFFF  "/ b0
               16rFFFF 16rFFFF 16rFFFF 16r00bb 16rFFFF 16rFFFF 16r00ac 16r00bf

               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF  "/ c0
               16rFFFF 16r00b2 16r00b0 16r00b8 16r00b3 16rFFFF 16rFFFF 16rFFFF

               16rFFFF 16r00b1 16r00bc 16r00bd 16r00be 16r00e0 16r00e1 16r00e2  "/ d0
               16r00e3 16r00e4 16r00e5 16r00e7 16r00e8 16r00e9 16r00ea 16r00eb

               16r00ec 16r00c6 16r00ed 16r00aa 16r00ee 16r00ef 16r00f0 16r00f1  "/ e0
               16rFFFF 16r00d8 16rFFFF 16r00ba 16r00f2 16r00f3 16r00f4 16r00f5

               16r00f6 16r00e6 16r00f9 16r00fa 16r00fb 16rFFFF 16r00fc 16r00fd  "/ f0
               16rFFFF 16r00f8 16rFFFF 16r00df 16r00fe 16r00ff 16rFFFF 16rFFFF
            ).
    ].

    "/
    "/ msdos (codePage 437 [=US]) -> iso8859
    "/
    "/
    "/ You will loose the following characters:
    "/ - all of them will be replaced by 8F
    "/
    "/  9E      (pesetas)
    "/  9F      (latin f)
    "/  A9      (technical not)
    "/  B0 - BF (block graphic)
    "/  C0 - CF (block graphic)
    "/  D0 - DF (block graphic)
    "/  E0      (greek alpha)
    "/  E2 - E5 (greek)
    "/  E7 - EC (greek & math)
    "/  EE - EF (greek & math)
    "/  F0      (math)
    "/  F2 - F5 (math & technical)
    "/  F7      (math)
    "/  F9      (center dot)
    "/  FB      (math sqrt)
    "/  FC      (super n)
    "/  FE      (block)
    "/
    encoding == #msdos437 ifTrue:[
        unrepresentableCharacterCode := 16rBF.

        x80Table :=
            #(
               "/ Ccedil  uuml  eacute  acirc   auml    agrave  aring   ccedil
               16r00c7 16r00fc 16r00e9 16r00e2 16r00e4 16r00e0 16r00e5 16r00e7  "/ 80
               "/ ecirc   euml  egrave  iuml    icirc   igrave  Auml    Aring
               16r00ea 16r00eb 16r00e8 16r00ef 16r00ee 16r00ec 16r00c4 16r00c5
               "/ Eacute  ae     AE     ocirc   ouml    ograve  ucirc   ugrave
               16r00c9 16r00e6 16r00c6 16r00f4 16r00f6 16r00f2 16r00fb 16r00f9  "/ 90
               "/ yuml    Ouml   Uuml   cent    pound   Yen     psetas  latin-f
               16r00ff 16r00d6 16r00dc 16r00a2 16r00a3 16r00a5 16rFFFF 16rFFFF

               "/ aacute iacute oacute  uacute  ntilde  Ntilde  asup    0sup 
               16r00e1 16r00ed 16r00f3 16r00fa 16r00f1 16r00d1 16r00aa 16r00b0  "/ a0
               "/ iquest        lognot  1/2     1/4     iexcla  <<      >>
               16r00bf 16rFFFF 16r00ac 16r00bd 16r00bc 16r00a1 16r00ab 16r00bb

               "/ block graphics
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF  "/ b0
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF

               "/ block graphics
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF  "/ c0
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF

               "/ block graphics
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF  "/ d0
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF

               "/ alpha beta   ...                              mu
               16rFFFF 16r00df 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00b5 16rFFFF  "/ e0
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00f8 16rFFFF

               "/       plusmin  gr-eq  lt-eq   integr  integr  div
               16rFFFF 16r00b1 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00f7 16rFFFF  "/ f0
               "/ 0sup  dot      dot    sqr             sup2            space 
               16r00b0 16rFFFF 16r00b7 16rFFFF 16rFFFF 16r00b2 16rFFFF 16r00a0
            ).
    ].


    "/
    "/ msdos (codePage 850 [=latin1]) -> iso8859
    "/

    (encoding == #msdos850 
    or:[encoding == #msdos]) ifTrue:[
        unrepresentableCharacterCode := 16rBF.
        x80Table :=
            #(
               "/ Ccedil  uuml  eacute  acirc   auml    agrave  aring   ccedil
               16r00c7 16r00fc 16r00e9 16r00e2 16r00e4 16r00e0 16r00e5 16r00e7  "/ 80

               "/ ecirc   euml  egrave  iuml    icirc   igrave  Auml    Aring
               16r00ea 16r00eb 16r00e8 16r00ef 16r00ee 16r00ec 16r00c4 16r00c5

               "/ Eacute  ae     AE     ocirc   ouml    ograve  ucirc   ugrave
               16r00c9 16r00e6 16r00c6 16r00f4 16r00f6 16r00f2 16r00fb 16r00f9  "/ 90
               "/ yuml    Ouml   Uuml   oslash    pound Oslash  x      latin-f
               16r00ff 16r00d6 16r00dc 16r00f8 16r00a3 16r00d8 16rFFFF 16rFFFF

               "/ aacute iacute oacute  uacute  ntilde  Ntilde  asup    0sup 
               16r00e1 16r00ed 16r00f3 16r00fa 16r00f1 16r00d1 16r00aa 16r00da  "/ a0
               "/ iquest reg    lognot  1/2     1/4     iexcla  <<      >>
               16r00bf 16r00ae 16r00ac 16r00bd 16r00bc 16r00a1 16r00ab 16r00bb

               "/                                       Aacute  Acirc   Agrave
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00c1 16r00c2 16r00c0  "/ b0
               "/ cpyr                                  cent    Yen
               16r00a9 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00a2 16r00a5 16rFFFF

               "/                                               atilde  Atilde
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r00e3 16r00c3  "/ c0
               "/                                               
               16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16rFFFF 


               "/               Ecirc   Euml    Egrave          Iacute  Icirc
               16rFFFF 16rFFFF 16r00ca 16r00cb 16r00c8 16rFFFF 16r00dc 16r00ce  "/ d0
               "/ Iuml                                          Igrave
               16r00cf 16rFFFF 16rFFFF 16rFFFF 16rFFFF 16r007c 16r00cc 16rFFFF  

               "/ Oacute  sz    Ocirc   Ograve  otilde  Otilde   mu
               16r00d3 16r00df 16r00d4 16r00d2 16rFFFF 16r00f5 16r00b5 16rFFFF  "/ e0
               "/       Uacute  Ucirc   Ugrav    yacute  Yacute             '
               16rFFFF 16r00da 16r00db 16r00d9 16r00fd 16r00dd 16rFFFF 16r0027

               "/       plusmin  =       3/4    para   sect      div            "/ f0
               16rFFFF 16r00b1 16r003d 16r00be 16r00b6 16r00a7 16r00f7 16rFFFF

               "/ sup0                          sup3   sup2
               16r00b0 16rFFFF 16rFFFF 16rFFFF 16r00b3 16r00b2 16rFFFF 16r00a0
            ).
    ].
    "/ more encodings needed here ....


    "/
    "/ x80Table should contain the translation for
    "/ 0x80..0xFF characters
    "/
    x80Table isNil ifTrue:[
        table := #identity.
    ] ifFalse:[
        x80Table keysAndValuesDo:[:idx :repl |
            |ch|

            repl == 16rFFFF ifTrue:[
                ch := unrepresentableCharacterCode
            ] ifFalse:[
                ch := repl
            ].
            table at:(16r80 + idx) put:ch
        ].
    ].

    DecoderTables at:encoding put:table.
    ^ table

    "
     EncoderTables := nil.
     DecoderTables := nil
    "

    "Created: 20.10.1995 / 23:04:43 / cg"
    "Modified: 1.7.1997 / 17:24:49 / cg"
!

defaultRomanCharacterValue
    "return the characterValue for unmappable roman characters
     (only applies to jis, gb, ksc etc.)"

    |romans|

    romans := self romanTable.
    romans isNil ifTrue:[^ 16r20].
    ^ romans at:1.              "/ space characters xlation

    "Created: 30.6.1997 / 18:13:24 / cg"
    "Modified: 30.6.1997 / 18:19:26 / cg"
!

encoderTableFor:encoding
    "return a table to encode from the internal (iso8859, ansi) format
     into an alien encoding.
     This is an experimental interface - unfinished"

    |table decoderTable unrepresentableCharacterCode|

    EncoderTables notNil ifTrue:[
        table := EncoderTables at:encoding ifAbsent:nil.
        table notNil ifTrue:[
            table == #identity ifTrue:[^ nil].
            ^ table
        ].
    ] ifFalse:[
        EncoderTables := IdentityDictionary new
    ].

    "/
    "/ iso8859 -> hp
    "/
    encoding == #hp ifTrue:[
        decoderTable := self decoderTableFor:encoding.
        unrepresentableCharacterCode := 16rFF.
    ].

    "/
    "/ iso8859 -> mac
    "/
    encoding == #mac ifTrue:[
        decoderTable := self decoderTableFor:encoding.
        unrepresentableCharacterCode := 16rFF.
    ].

    "/
    "/ iso8859 -> msdos
    "/
    encoding == #msdos ifTrue:[
        decoderTable := self decoderTableFor:encoding.
        unrepresentableCharacterCode := 16rFF.
    ].

    "/ more encodings needed here ....

    decoderTable notNil ifTrue:[
        table := Array new:256 withAll:unrepresentableCharacterCode.

        0 to:16rFF do:[:code |
            |isoCode destIdex|

            isoCode := decoderTable at:(code+1).
            table at:isoCode+1 put:code.
        ]
    ].

    table isNil ifTrue:[
        "/
        "/ cannot put a nil into an Id-Dict;
        "/ use special #identity
        "/
        table := #identity.
    ].

    EncoderTables at:encoding put:table.
    ^ table

    "
     EncoderTables := nil.
     DecoderTables := nil.
     CharacterArray decoderTableFor:#mac
     CharacterArray encoderTableFor:#mac
    "

    "Created: 22.2.1996 / 16:17:58 / cg"
    "Modified: 1.7.1997 / 17:26:28 / cg"
!

romanTable
    ^ nil

    "Created: 30.6.1997 / 15:13:52 / cg"
!

supportedEncodings
    "return an array containing symbolic names of supported encodings. 
     These are internally visible supported ones only"

    ^ self supportedExternalEncodings at:2.

    "Modified: 30.6.1997 / 14:38:25 / cg"
!

supportedExternalEncodings
    "return an array of two arrays containing the names of supported
     encodings which are supported for external resources (i.e. files).
     The first array contains user-readable strings (descriptions),
     the second contains the internally used symbolic names.
     More than one external name may be mapped onto the same symbolic."

    ^ #( 
         (
          'iso8859      (ansi)'  
          'iso8859-1    (latin1)'  
          'iso8859-2    (latin2)'  
          'iso8859-3    (latin3)'  
          'iso8859-4    (latin4)'  
          'iso8859-5    (cyrillic)'  
          'iso8859-6    (arabic)'  
          'iso8859-7    (greek)'  
          'iso8859-8    (hebrew)'  
          'msdos US     (codepage 437)'  
          'msdos Latin1 (codepage 850)'  
          'macintosh    (8 bit)' 
          'next         (8 bit)' 
          'hp           (8 bit)' 
          nil
          'EUC          (extended unix code japanese)' 
          'JIS7         (jis 7bit escape codes japanese)'
          'ISO-2022-JP  (same as jis 7bit)'
          'SJIS         (shift jis 8bit codes japanese)'
          nil
          'GB           (mainland china)'
          'BIG5         (taiwan)'
"/          'KSC          (korean)'
         )
         (
          #'iso8859'
          #'iso8859-1'    "/ latin1
          #'iso8859-2'    "/ latin2
          #'iso8859-3'    "/ latin3
          #'iso8859-4'    "/ latin4
          #'iso8859-5'    "/ cyrillic
          #'iso8859-6'    "/ arabic
          #'iso8859-7'    "/ greek
          #'iso8859-8'    "/ hebrew
          #'msdos437'                   
          #'msdos850'                   
          #'mac'         
          #'next'   
          #'hp'   
          nil
          #'euc'   
          #'jis7'   
          #'iso-2022-jp'   
          #'sjis'   
          nil
          #'gb'   
          #'big5'
"/          #'ksc'           "/ korean
         )
       )

    "Created: 22.4.1996 / 14:39:39 / cg"
    "Modified: 16.7.1997 / 13:47:16 / cg"
! !

!CharacterArray class methodsFor:'encoding / decoding'!

decodeFromBIG5:aString
    "return a new string containing the characters from aString,
     which is interpreted as a BIG5 encoded singleByte string.

     The result is a Big5EncodedString (you need a BIG5 font to display that ...)."

    |newString|

    newString := BIG5EncodedString new:aString size.
    ^ self decodeFromBIG5_or_GB:aString into:newString.

    "Modified: 17.4.1996 / 18:58:29 / cg"
!

decodeFromBIG5_or_GB:aString into:a16BitString
    "return a new string containing the characters from aString,
     which is interpreted as a BIG5 encoded singleByte string.
     The result is a Big5EncodedString (you need a BIG5 font to display that ...).

     This is a first-class candidate for a primitive"

    |sz     "{ Class: SmallInteger }"
     dstIdx "{ Class: SmallInteger }"
     srcIdx "{ Class: SmallInteger }"
     b1     "{ Class: SmallInteger }"
     b2     "{ Class: SmallInteger }"
     val    "{ Class: SmallInteger }"
     c|

    sz := aString size.
    sz ~~ 0 ifTrue:[
        dstIdx := 1.
        srcIdx := 1.

        [true] whileTrue:[
            c := aString at:srcIdx.
            "/
            "/ characters below 16rA1 are left untranslated
            "/ (romans).
            "/ Translation into roman-row is done at display time.
            "/
            b1 := c asciiValue.
            b1 >= 16rA1 ifTrue:[
                srcIdx := srcIdx + 1.
                srcIdx <= sz ifTrue:[
                    b2 := (aString at:srcIdx) asciiValue.
                    val := (b1 bitShift:8) bitOr:b2.
                    c := Character value:val.
                ]
            ].

            a16BitString at:dstIdx put:c.
            dstIdx := dstIdx + 1.
            srcIdx := srcIdx + 1.

            srcIdx > sz ifTrue:[
                ^ a16BitString copyFrom:1 to:dstIdx-1.
            ]
        ]
    ].
    ^ a16BitString

    "Created: 17.4.1996 / 16:55:54 / cg"
    "Modified: 4.7.1997 / 11:01:38 / cg"
!

decodeFromEUC:aString
    "return a new string containing the characters from aString,
     which is interpreted as an EUC encoded singleByte string.

     There are various JIS encodings around (New-JIS, Old-JIS and NEC-JIS);
     this one only understands New-JIS.
     The result is a JISEncodedString (you need a JIS font to display that ...).

     This is a first-class candidate for a primitive"

    |newString 
     sz     "{ Class: SmallInteger }"
     dstIdx "{ Class: SmallInteger }"
     srcIdx "{ Class: SmallInteger }"
     b1     "{ Class: SmallInteger }"
     b2     "{ Class: SmallInteger }"
     val    "{ Class: SmallInteger }"
     c c2|

    sz := aString size.
    newString := JISEncodedString new:sz.
    sz ~~ 0 ifTrue:[
        dstIdx := 1.
        srcIdx := 1.

%{
        if (__isString(aString)
         && (__Class(newString) == @global(JISEncodedString))) {
            int _dstIdx = 1, _srcIdx = 1;
            int _sz = __intVal(sz);
            unsigned char *_cp = __stringVal(aString);
            unsigned char _c1;
            unsigned short *_jcp = (unsigned short *)__stringVal(newString);

            while (_srcIdx <= _sz) {
                _c1 = _cp[_srcIdx-1];
                if (_c1 < 161) {
                    _jcp[_dstIdx-1] = _c1;
                } else {
                    _srcIdx++;
                    if (_srcIdx <= _sz) {
                        unsigned char _c2;
                        int _val;
                        int _b1, _b2;

                        _b1 = _c1 - 128;
                        _c2 = _cp[_srcIdx-1];
                        _b2 = _c2 - 128;
                        _val = (_b1<<8) + _b2;
                        if (_val < 0) {
                            /* decoder errors are handled in smalltalk */
                            _srcIdx--;
                            goto getOutOfHere;
                        }
                        _jcp[_dstIdx-1] = _val;
                    } else {
                        _jcp[_dstIdx-1] = _c1;
                    }
                }
                _dstIdx++;
                _srcIdx++;
            }
    getOutOfHere:
            srcIdx = __MKSMALLINT(_srcIdx);
            dstIdx = __MKSMALLINT(_dstIdx);
        }
%}.

        [srcIdx <= sz] whileTrue:[
            c := aString at:srcIdx.
            b1 := c asciiValue.
            b1 < 161 ifTrue:[
                "/ characters below 16rA1 are left untranslated
                "/ (control character or roman).
                newString at:dstIdx put:c.
            ] ifFalse:[
                srcIdx := srcIdx + 1.
                srcIdx <= sz ifTrue:[    
                    b1 := b1 - 128.
                    b2 := (c2 := aString at:srcIdx) asciiValue.
                    b2 := b2 - 128.
                    val := (b1 bitShift:8) bitOr:b2.
                    val <= 0 ifTrue:[
                        DecodingFailedSignal 
                            raiseWith:aString
                            errorString:'EUC decoding failed (not EUC encoded ?)'.
                        newString at:dstIdx put:c.
                        dstIdx := dstIdx + 1.
                        newString at:dstIdx put:c2.
                    ] ifFalse:[
                        newString at:dstIdx put:(Character value:val).
                    ].
                ] ifFalse:[
                    newString at:dstIdx put:c.
                ].
            ].
            dstIdx := dstIdx + 1.
            srcIdx := srcIdx + 1.
        ].

        (dstIdx-1) ~~ sz ifTrue:[
            newString := newString copyFrom:1 to:dstIdx-1.
        ].
    ].
    ^ newString

    "simple:

         'hello' decodeFrom:#euc 
    "

    "Created: 17.4.1996 / 16:10:22 / cg"
    "Modified: 4.7.1997 / 11:06:05 / cg"
!

decodeFromGB:aString
    "return a new string containing the characters from aString,
     which is interpreted as a GB encoded singleByte string.

     The result is a Big5EncodedString (you need a GB font to display that ...)."

    |newString|

    newString := GBEncodedString new:aString size.
    ^ self decodeFromBIG5_or_GB:aString into:newString

    "Created: 17.4.1996 / 16:56:33 / cg"
!

decodeFromJIS7:aString
    "return a new string containing the aStrings characters,
     which are interpreted as a JIS7 or ISO2022-JP encoded singleByte string.
     There are various JIS encodings around (New-JIS, Old-JIS, NEC-JIS and ISO2022);
     this one understands New-JIS, ISO2022 and treats Old-JIS just the same.
     The result is a JISEncodedString (you need a JIS font to display that ...).

     This is a first-class candidate for a primitive"

    |newString 
     sz         "{ Class: SmallInteger }"
     dstIdx     "{ Class: SmallInteger }"
     start      "{ Class: SmallInteger }"
     stop       "{ Class: SmallInteger }"
     n1 n2 n3  
     b1         "{ Class: SmallInteger }"
     b2         "{ Class: SmallInteger }"
     val        "{ Class: SmallInteger }"
     singleBytes katakana c|

    sz := aString size.
    newString := JISEncodedString new:sz.
    sz ~~ 0 ifTrue:[
        dstIdx := 1.
        start := 1.
        singleBytes := true.
        katakana := false.

        [true] whileTrue:[
            "/
            "/ scan for next escape"
            "/
            stop := aString indexOf:(Character esc) startingAt:start.
            stop == 0 ifTrue:[
                stop := sz + 1.
            ] ifFalse:[
                (stop + 2) > sz ifTrue:[
                    stop := sz + 1.
                ]
            ].
            singleBytes ifTrue:[
                start to:(stop - 1) do:[:i |
                    c := aString at:i.
                    newString at:dstIdx put:c.
                    dstIdx := dstIdx + 1.
                ].
            ] ifFalse:[
                start to:(stop - 2) by:2 do:[:i |
                    b1 := (aString at:i) asciiValue.
                    b2 := (aString at:i+1) asciiValue.
                    val := (b1 bitShift:8) bitOr:b2.
                    newString at:dstIdx put:(Character value:val).
                    dstIdx := dstIdx + 1.
                ]
            ].

            stop > sz ifTrue:[
                ^ newString copyFrom:1 to:dstIdx - 1.
            ].
            start := stop.

            "/
            "/ found an escape (at start) 
            "/ - check for KI (<ESC> '$' 'B') or OLD-JIS-KI (<ESC> '$' '@')
            "/ and KO(ASCII) (<ESC> '(' 'B') or KO(ROMAN) (<ESC> '(' 'J')
            "/
            n1 := aString at:start.
            n2 := aString at:(start + 1).
            n3 := aString at:(start + 2).
            katakana := false.

            (n2 == $$ and:[n3 == $B ]) ifTrue:[
                singleBytes := false.
            ] ifFalse:[
                (n2 == $$ and:[n3 == $@ ]) ifTrue:[
                    singleBytes := false.
                ] ifFalse:[
                    (n2 == $( and:[n3 == $B ]) ifTrue:[
                        singleBytes := true.
                    ] ifFalse:[
                        (n2 == $( and:[n3 == $J ]) ifTrue:[
                            singleBytes := true.
                        ] ifFalse:[
                            (n2 == $( and:[n3 == $I ]) ifTrue:[
                                singleBytes := true.
                                katakana := true.
                            ] ifFalse:[
                                singleBytes ifTrue:[
                                    newString at:dstIdx put:n1.
                                    newString at:(dstIdx + 1) put:n2.
                                    newString at:(dstIdx + 2) put:n3.
                                    dstIdx := dstIdx + 3.
                                ] ifFalse:[
                                    DecodingFailedSignal 
                                            raiseWith:aString
                                            errorString:'JIS7 decoding failed (not JIS7 encoded ?)'.
                                    newString at:dstIdx put:n1.
                                    newString at:(dstIdx + 1) put:n2.
                                    newString at:(dstIdx + 2) put:n3.
                                    dstIdx := dstIdx + 3.
                                ]
                            ]
                        ]
                    ]
                ]
            ].
            start := start + 3.
            start > sz ifTrue:[
                ^ newString copyFrom:1 to:dstIdx-1.
            ]
        ]
    ].
    ^ newString

    "simple:

         'hello' decodeFromJIS7

     ending with a crippled escape:

         |s|
         s := 'hello' copyWith:Character esc.
         s decodeFromJIS7

         |s|
         s := 'hello' copyWith:Character esc.
         s := s copyWith:$A.
         s decodeFromJIS7

         |s|
         s := 'hello' copyWith:Character esc.
         s := s copyWith:$$.
         s decodeFromJIS7

         |s|
         s := 'hello' copyWith:Character esc.
         s := s copyWith:$$.
         s := s copyWith:$A.
         s decodeFromJIS7 

     ending with a KANJI-in,  but no more chars:

         |s|
         s := 'hello' copyWith:Character esc.
         s := s copyWith:$$.
         s := s copyWith:$B.
         s decodeFromJIS7

     ending with a KANJI-in, followed by $3 (KO):

         |s|
         s := 'hello' copyWith:Character esc.
         s := s copyWith:$$.
         s := s copyWith:$B.
         s := s , '$3'.
         s decodeFromJIS7

     ending with a KANJI-in, followed by $3$l$OF| (KO RE HA NI):

         |s|
         s := 'hello' copyWith:Character esc.
         s := s copyWith:$$.
         s := s copyWith:$B.
         s := s , '$3$l$OF|'.
         s decodeFromJIS7

     a KO in between:

         |s|
         s := 'hello' copyWith:Character esc.
         s := s copyWith:$$.
         s := s copyWith:$B.
         s := s , '$3'.
         s := s copyWith:Character esc.
         s := s copyWith:$(.
         s := s copyWith:$B.
         s := s , 'hello'.
         s decodeFromJIS7

     I dont know what that means ;-):

         |s t l|
         s := 'kterm ' copyWith:Character esc.
         s := s copyWith:$$.
         s := s copyWith:$B.
         s := s , '$N4A;zC<Kv%(%_%e%l!!<%?'.
         s := s copyWith:Character esc.
         s := s copyWith:$(.
         s := s copyWith:$B.
         s := s , ' kterm'.
         t := s decodeFromJIS7.
         l := Label new.
         l label:t.
         l font:(Font family:'k14' face:nil style:nil size:nil).
         l font:(Font family:'gothic' size:17).
         l font:(Font family:'mincho' size:23).
         l realize
    "

    "Created: 17.4.1996 / 16:11:57 / cg"
    "Modified: 16.7.1997 / 12:50:55 / cg"
!

decodeFromSJIS:aString
    "return a new string containing the aStrings characters,
     which are interpreted as a Shift-JIS encoded singleByte string.
     The result is a JISEncodedString (you need a JIS font to display that ...).

     This is a first-class candidate for a primitive"

    |newString char1 char2
     sz         "{ Class: SmallInteger }"
     dstIdx     "{ Class: SmallInteger }"
     srcIdx     "{ Class: SmallInteger }"
     b1         "{ Class: SmallInteger }"
     b2         "{ Class: SmallInteger }"
     val        "{ Class: SmallInteger }"
    |

    sz := aString size.
    newString := JISEncodedString new:sz.
    sz ~~ 0 ifTrue:[
        dstIdx := 1.
        srcIdx := 1.
%{
        if (1 && __isString(aString)
         && (__Class(newString) == @global(JISEncodedString))) {
            int _dstIdx = 1, _srcIdx = 1;
            int _sz = __intVal(sz);
            unsigned char *_cp = __stringVal(aString);
            unsigned char _c1, _c2;
            unsigned short *_jcp = (unsigned short *)__stringVal(newString);

            while (_srcIdx <= _sz) {
                int _val;

                _c1 = _cp[_srcIdx-1];
                _srcIdx++;

                if ((_srcIdx <= _sz)
                 && (((_c1 >= 129) && (_c1 <= 159))
                     || ((_c1 >= 224) && (_c1 <= 239)))) {
                    _c2 = _cp[_srcIdx-1];
                    _srcIdx++;
                    if ((_c2 >= 64) && (_c2 <= 252)) {
                        int _adjust, _rowOffs, _cellOffs;
                        int _b1, _b2;

                        _adjust = (_c2 < 159) ? 1 : 0;
                        _rowOffs = (_c1 < 160) ? 112 : 176;
                        if (_adjust) {
                            _cellOffs = 31 + ((_c2 > 127) ? 1 : 0);
                        } else {
                            _cellOffs = 126;
                        }
                        _b1 = ((_c1 - _rowOffs) << 1) - _adjust;
                        _b2 = (_c2 - _cellOffs);
                        _val = (_b1<<8) + _b2;
                        if (_val <= 0) {
                            /* decoder error - let smalltalk handle that */
                            _srcIdx -= 2;
                            goto getOutOfHere;
                        }
                        _jcp[_dstIdx-1] = _val;
                    } else {
                        /* mhmh - append untranslated */

                        _jcp[_dstIdx-1] = _c1;
                        _dstIdx++;
                        _jcp[_dstIdx-1] = _c2;
                    }
                } else {
                    if ((_c1 >= 0xA1 /* 161 */) && (_c1 <= 0xDF /* 223 */)) {
                        /* HALFWIDTH KATAKANA
                         * map half-width katakana to 8E:xx
                         */
                        _val = _c1 - 128;
                        _val = _val + 0x8E00;
                        _jcp[_dstIdx-1] = _val;
                    } else {
                        /* roman characters left untranslated */
                        _jcp[_dstIdx-1] = _c1;
                    }
                }
                _dstIdx++;
            }
        getOutOfHere: ;
            dstIdx = __MKSMALLINT(_dstIdx);
            srcIdx = __MKSMALLINT(_srcIdx);
        }
%}.

        [srcIdx <= sz] whileTrue:[
            "/
            "/ scan for next character in 129..159 or 224..239
            "/
            char1 := aString at:srcIdx.
            srcIdx := srcIdx + 1.
            b1 := char1 asciiValue.

            ((srcIdx <= sz) 
            and:[(b1 >= 129 and:[b1 <= 159])                 "/ SJIS1 81 .. 9F
                 or:[b1 >= 224 and:[b1 <= 239]]]) ifTrue:[   "/       E0 .. EF
                char2 := aString at:srcIdx.
                srcIdx := srcIdx + 1.
                b2 := char2 asciiValue.
                (b2 >= 64 and:[b2 <= 252]) ifTrue:[          "/ SJIS2 40 .. FC
                    |adjust rowOffs cellOffs|

                    adjust := (b2 < 159) ifTrue:[1] ifFalse:[0].
                    rowOffs := b1 < 160 ifTrue:[112] ifFalse:[176].
                    adjust == 1 ifTrue:[
                        cellOffs := 31 + (b2 > 127 ifTrue:[1] ifFalse:[0]).
                    ] ifFalse:[
                        cellOffs := 126.
                    ].
                    b1 := ((b1 - rowOffs) bitShift:1) - adjust.
                    b2 := (b2 - cellOffs).
                    val := (b1 bitShift:8) + b2.
                    val <= 0 ifTrue:[
                        DecodingFailedSignal 
                                raiseWith:aString
                                errorString:'SJIS decoding failed (not SJIS encoded ?)'.
                        newString at:dstIdx put:char1.
                        dstIdx := dstIdx + 1.
                        newString at:dstIdx put:char2.
                    ] ifFalse:[
                        newString at:dstIdx put:(Character value:val).
                    ]
                ] ifFalse:[
                    "/ mhmh - append untranslated

                    newString at:dstIdx put:char1.
                    dstIdx := dstIdx + 1.
                    newString at:dstIdx put:char2.
                ]
            ] ifFalse:[    
                (b1 >= 16rA1 "161" and:[b1 <= 16rDF "223"]) ifTrue:[     "/ HALFWIDTH KATAKANA
                    "/ map half-width katakan to 8E:xx
                    val := b1 - 128.
                    val := val + (16r8E bitShift:8).
                    newString at:dstIdx put:(Character value:val).
                ] ifFalse:[    
                    "/ roman characters left untranslated
                    newString at:dstIdx put:char1
                ]
            ].
            dstIdx := dstIdx + 1.
        ].
        (dstIdx-1) ~~ sz ifTrue:[
            newString := newString copyTo:dstIdx - 1.
        ]
    ].

    ^ newString

    "simple:

         'hello' decodeFrom:#sjis         

         '../../doc/online/japanese/TOP.html' asFilename contents asString
                decodeFrom:#sjis  
    "

    "Created: 28.6.1997 / 19:19:23 / cg"
    "Modified: 4.7.1997 / 11:01:22 / cg"
!

encodeIntoBIG5:aBIG5String
    "return a new string with aBIG5Strings characters as BIG5 encoded 16bit string,
     The argument must be a BIG5String.
     This is used translate 16bit BIG5 strings into the external BIG5 encoded
     representation.
     The resulting string is only useful to be stored on some external file,
     not for being displayed in an ST/X view.

     This is a first-class candidate for a primitive"

    |sz "{ Class: SmallInteger }"
     b  "{ Class: SmallInteger }"
     c out|

    sz := aBIG5String size.
    out := WriteStream on:(String new:(sz * 2)).

    1 to:sz do:[:index |
        c := aBIG5String at:index.
        b := c asciiValue.

        b > 255 ifTrue:[
            "/ big5 character

            out nextPut:(Character value:(b bitShift:-8)).
            out nextPut:(Character value:(b bitAnd:16rFF)).
        ] ifFalse:[
            "/
            "/ control or roman character
            "/
            out nextPut:c
        ].
    ].
    ^ out contents

    "Created: 17.4.1996 / 17:17:50 / cg"
    "Modified: 4.7.1997 / 11:01:57 / cg"
!

encodeIntoEUC:aJISString
    "return a new string with aJISStrings characters as EUC encoded 8bit string.
     The argument must be a JIS 16 bit character string.
     The resulting string is only useful to be stored on some external file,
     not for being displayed in an ST/X view.

     This is a first-class candidate for a primitive"

    |sz "{ Class: SmallInteger }"
     b1 "{ Class: SmallInteger }"
     val "{ Class: SmallInteger }"
     c romans out|

    romans := JISEncodedString romanTable.

    sz := aJISString size.
    sz == 0 ifTrue:[^ ''].

    out := WriteStream on:(String new:(sz * 2)).

    1 to:sz do:[:srcIndex |
        b1 := (c := aJISString at:srcIndex) asciiValue.
        b1 < 161 ifTrue:[
            "/ a control or roman character    
            out nextPut:c.
        ] ifFalse:[
            "/
            "/ check for a roman character
            "/ the two numbers below are romanTable min and romanTable max
            "/
            (b1 between:16r2121 and:16r2573) ifTrue:[
                val := romans indexOf:b1.
                (val ~~ 0 and:[val <= 127]) ifTrue:[
                    out nextPut:(Character value:(val - 1 + 32))
                ] ifFalse:[
                    out nextPut:(Character value:(b1 bitShift:-8) + 128).
                    out nextPut:(Character value:(b1 bitAnd:16rFF) + 128).
                ].
            ] ifFalse:[
                out nextPut:(Character value:(b1 bitShift:-8) + 128).
                out nextPut:(Character value:(b1 bitAnd:16rFF) + 128).
            ]
        ].
    ].
    ^ out contents

    "simple:

         ('hello' decodeFrom:#euc) encodeInto:#euc    
    "

    "Created: 17.4.1996 / 16:13:33 / cg"
    "Modified: 4.7.1997 / 11:03:43 / cg"
!

encodeIntoGB:aGBString
    "return a new string with aGBStrings characters as GB encoded 8bit string.
     The argument must be a GBString.
     This is used translate 16bit GB strings into the external GB encoded
     representation.
     The resulting string is only useful to be stored on some external file,
     not for being displayed in an ST/X view.

     This is a first-class candidate for a primitive"

    |sz "{ Class: SmallInteger }"
     b  "{ Class: SmallInteger }"
     c out|

    sz := aGBString size.
    out := WriteStream on:(String new:(sz * 2)).

    1 to:sz do:[:index |
        c := aGBString at:index.
        b := c asciiValue.
        b > 255 ifTrue:[
            "/ big5 character

            out nextPut:(Character value:(b bitShift:-8)).
            out nextPut:(Character value:(b bitAnd:16rFF)).
        ] ifFalse:[
            "/
            "/ control or roman character
            "/
            out nextPut:c
        ].
    ].
    ^ out contents

    "Created: 30.6.1997 / 15:48:35 / cg"
    "Modified: 4.7.1997 / 11:02:13 / cg"
!

encodeIntoJIS7:aJISString
    "return a new string with aJISStrings characters as JIS7 encoded 7bit string,
     The receiver must be a JIS encoded character string.

     The resulting string is only useful to be stored on some external file,
     not for being displayed in an ST/X view."

    |sz "{ Class:SmallInteger }"
     b1 "{ Class:SmallInteger }"
     val romans out inSingleByteMode c kanji roman val2|

    inSingleByteMode := true.
    kanji := JISEncodedString jis7KanjiEscapeSequence.
    roman := JISEncodedString jis7RomanEscapeSequence.

    romans := JISEncodedString romanTable.

    sz := aJISString size.
    sz == 0 ifTrue:[^ ''].

    out := WriteStream on:(String new:(sz * 2)).

    1 to:sz do:[:srcIndex |
        c := aJISString at:srcIndex.
        b1 := c asciiValue.
        b1 < 33 ifTrue:[
            "/ a control character
            inSingleByteMode ifFalse:[
                out nextPutAll:roman.
                inSingleByteMode := true
            ].
            out nextPut:c.
        ] ifFalse:[
            "/ check for a roman character
            "/ the two numbers below are romanTable min and romanTable max
            "/
            (b1 between:16r2121 and:16r2573) ifTrue:[
                val := romans indexOf:b1.
                val2 := val - 1 + 32.
                (val ~~ 0 and:[val2 <= 16r7F]) ifTrue:[
                    inSingleByteMode ifFalse:[
                        out nextPutAll:roman.
                        inSingleByteMode := true
                    ].
                    out nextPut:(Character value:val2)
                ] ifFalse:[
                    inSingleByteMode ifTrue:[
                        out nextPutAll:kanji.
                        inSingleByteMode := false
                    ].
                    out nextPut:(Character value:(b1 bitShift:-8)).
                    out nextPut:(Character value:(b1 bitAnd:16rFF)).
                ].
            ] ifFalse:[
                b1 <= 255 ifTrue:[
                    "/ mhmh - unrepresentable roman (national chars)
"/                    b1 >= 160 ifTrue:[
"/                        ('no rep for ' , b1 printString) printNL.
"/                    ].
                    EncodingFailedSignal
                        raiseWith:aJISString
                        errorString:'JIS7 encoding failed (contains 8-bit characters ?)'.

                    inSingleByteMode ifFalse:[
                        out nextPutAll:roman.
                        inSingleByteMode := true
                    ].
                    out nextPut:c
                ] ifFalse:[
                    inSingleByteMode ifTrue:[
                        out nextPutAll:kanji.
                        inSingleByteMode := false
                    ].
                    out nextPut:(Character value:(b1 bitShift:-8)).
                    out nextPut:(Character value:(b1 bitAnd:16rFF)).
                ]
            ]
        ].
    ].
    inSingleByteMode ifFalse:[
        out nextPutAll:roman.
    ].
    ^ out contents

    "simple:

         'hello' decodeFromJIS7 encodeIntoJIS7 
    "

    "Created: 17.4.1996 / 16:17:40 / cg"
    "Modified: 16.7.1997 / 11:30:07 / cg"
!

encodeIntoSJIS:aJISString
    "return a new string with aJISStrings characters as SJIS encoded 8bit string.
     The receiver must be a JIS encoded character string.

     The resulting string is only useful to be stored on some external file,
     not for being displayed in an ST/X view."

    |sz "{ Class: SmallInteger }"
     rval "{ Class: SmallInteger }"
     val  "{ Class: SmallInteger }"
     romans c out isSJIS|

    romans := JISEncodedString romanTable.

    sz := aJISString size.
    sz == 0 ifTrue:[^ ''].

    out := WriteStream on:(String new:(sz * 2)).

    1 to:sz do:[:srcIndex |
        val := (c := aJISString at:srcIndex) asciiValue.
        val <= 128 ifTrue:[
            "/ a control or ascii character    
            out nextPut:c.
        ] ifFalse:[
            (val > 150 and:[val < 224]) ifTrue:[
                "/ ascii subset
                out nextPut:c.
            ] ifFalse:[
                "/ should not happen ...
                val <= 255 ifTrue:[
                    out nextPut:c.
                ] ifFalse:[
                    isSJIS := true.

                    "/ check for HALFWIDTH KATAKANA
                    "/ 8E:xx
                    "/ NO: halfwidth katakana no longer generated
                    "/     remains there as full-width katakana

"/                    (val bitAnd:16rFF00) == 16r8E00 ifTrue:[
"/                        |b|
"/
"/                        b := (val bitAnd:16rFF) + 128.
"/                        (b >= 16rA1 "161" and:[b <= 16rDF "223"]) ifTrue:[
"/                            out nextPut:(Character value:b).
"/                            isSJIS := false.
"/                        ].
"/                    ].

                    isSJIS ifTrue:[

                        "/ check for a roman character
                        "/ the two numbers below are romanTable min and romanTable max
                        "/
                        (val between:16r2121 and:16r2573) ifTrue:[
                            rval := romans indexOf:val.
                            rval ~~ 0 ifTrue:[
                                rval := rval - 1 + 32.
                                rval <= 16r7F ifTrue:[ "/ do not translate halfwidth katakana
                                    out nextPut:(Character value:rval).
                                    isSJIS := false.
                                ]
                            ].
                        ].
                    ].

                    isSJIS ifTrue:[
                        |b1 b2 rowOffset cellOffset|

                        b1 := (val bitShift:-8).
                        b2 := (val bitAnd:16rFF).
                        rowOffset := (b1 < 95) ifTrue:[112] ifFalse:[176].
                        cellOffset := b1 odd ifTrue:[(b2 > 95) ifTrue:[32] ifFalse:[31]]
                                             ifFalse:[126].

                        out nextPut:(Character value:(((b1 + 1) bitShift:-1) + rowOffset)).
                        out nextPut:(Character value:b2 + cellOffset).
                    ]
                ]
            ]
        ].
    ].
    ^ out contents

    "simple:

         'hello' decodeFromEUC encodeIntoEUC    
    "

    "Created: 28.6.1997 / 21:13:27 / cg"
    "Modified: 16.7.1997 / 11:30:45 / cg"
!

encodeRomans:aTwoByteString
    "return a new (usually 2-byte) string with roman characters
     encoded. The position of roman characters are encoding specific.
     (see romanTable of concrete 16-bit string classes).
     Used when displaying strings."

    |sz "{ Class: SmallInteger }"
     b  "{ Class: SmallInteger }"
     c i nCode romans newString|

    romans := self romanTable.
    romans isNil ifTrue:[^ aTwoByteString].

    sz := aTwoByteString size.
    newString := self fromString:aTwoByteString.

    1 to:sz do:[:index |
        c := aTwoByteString at:index.
        b := c asciiValue.

        (b > 32 and:[b <= 255]) ifTrue:[
            (i := b - 32 + 1) <= romans size ifTrue:[
                "/
                "/ a roman character
                "/ map to corresponding row
                "/
                nCode := romans at:i.
            ] ifFalse:[
                nCode := self defaultRomanCharacterValue
            ].
            newString at:index put:(Character value:nCode).
        ].
    ].
    ^ newString

    "Created: 30.6.1997 / 15:43:56 / cg"
    "Modified: 30.6.1997 / 21:16:20 / cg"
!

guessEncodingFrom:aString
    "try to guess some 8-bit strings encoding by
     searching for certain escape sequences.
     Returns a string or nil.
     This may not find the strings real encoding."

    |idx ascii|

    "/ look for JIS7 / EUC encoding ...

    (aString findString:(JISEncodedString jisISO2022EscapeSequence)) ~~ 0 ifTrue:[
        ^ #'iso2020-jp'
    ].
    (aString findString:(JISEncodedString jis7KanjiEscapeSequence)) ~~ 0 ifTrue:[
        ^ #jis7
    ].
    (aString findString:(JISEncodedString oldJis7KanjiEscapeSequence)) ~~ 0 ifTrue:[
        ^ #jis7
    ].

    "/ TODO:

"/    "/ look for EUC
"/    idx := aString findFirst:[:char | |ascii|
"/                                        ((ascii := char asciiValue) >= 16rA1)     
"/                                        and:[ascii <= 16rFE]].
"/    idx ~~ 0 ifTrue:[
"/        ascii := (aString at:(idx + 1)) asciiValue.
"/        (ascii >= 16rA1 and:[ascii <= 16rFE]) ifTrue:[
"/            ^ #euc
"/        ]
"/    ].

    "/ look for SJIS ...

    "/ look for UTS ...

    ^ nil

    "Modified: 16.7.1997 / 13:54:59 / cg"
! !

!CharacterArray class methodsFor:'pattern matching'!

matchScan:matchScanArray from:matchStart to:matchStop with:aString from:start to:stop ignoreCase:ignoreCase
    "helper for match; return true if the characters from start to stop in
     aString are matching the scan in matchScan from matchStart to matchStop.
     The matchScan is as created by asMatchScanArray.

     This algorithm is not at all the most efficient; 
     for heavy duty pattern matching, an interface (primitive) to the regex 
     pattern matching package should be added."

    |matchEntry 
     mStart "{ Class: SmallInteger }"
     mStop  "{ Class: SmallInteger }"
     sStart "{ Class: SmallInteger }"
     sStop  "{ Class: SmallInteger }"
     mSize  "{ Class: SmallInteger }"
     sSize  "{ Class: SmallInteger }"
     index  "{ Class: SmallInteger }"
     quickCheck matchLast nextMatchEntry
     checkChar included|

    mStart := matchStart.
    mStop := matchStop.
    sStart := start.
    sStop := stop.

    [true] whileTrue:[
"/ Transcript showCR:('match: ''' , (aString copyFrom:sStart to:sStop) , 
"/                    ''' against:' , (matchScanArray copyFrom:mStart to:mStop) printString).

        mSize := mStop - mStart + 1.
        sSize := sStop - sStart + 1.

        "empty strings match"
        (mSize == 0) ifTrue:[^ (sSize == 0)].

        matchEntry := matchScanArray at:mStart.

        "/ the most common case first:
        (sSize ~~ 0 
        and:[(checkChar := (aString at:sStart)) == matchEntry]) ifTrue:[
            "advance by one and continue"
            mStart := mStart + 1.
            sStart := sStart + 1
        ] ifFalse:[
            (matchEntry == #any) ifTrue:[
                "restString empty -> no match"
                (sSize == 0) ifTrue:[^ false].
                "# matches single character"
                ((sSize == 1) and:[mSize == 1]) ifTrue:[^ true].
                "advance by one and continue"
                mStart := mStart + 1.
                sStart := sStart + 1
            ] ifFalse:[
                (matchEntry == #anyString) ifTrue:[
                    "* alone matches anything"
                    (mSize == 1) ifTrue:[^ true].
                    "restString empty & matchString not empty -> no match"
                    (sSize == 0) ifTrue:[^ false].

                    "
                     try to avoid some of the recursion by checking last
                     character and continue with shortened strings if possible
                    "
                    quickCheck := false.
                    (mStop >= mStart) ifTrue:[
                        matchLast := matchScanArray at:mStop.
                        (matchLast ~~ #anyString) ifTrue:[
                            (matchLast == #any) ifTrue:[
                                quickCheck := true
                            ] ifFalse:[
                                matchLast == (aString at:sStop) ifTrue:[
                                    quickCheck := true
                                ] ifFalse:[
                                    matchLast isString ifTrue:[
                                        quickCheck := matchLast includes:(aString at:sStop)
                                    ]
                                ]
                            ]
                        ]
                    ].
                    quickCheck ifTrue:[
                        "
                         quickCheck ok, advance from the right
                        "
                        mStop := mStop - 1.
                        sStop := sStop - 1
                    ] ifFalse:[
                        "/ no quick check; 
                        "/ look for the next character(s)
                        "/ and try matching there
                        "/ (to avoid recursion)

                        mStart < mStop ifTrue:[
                            nextMatchEntry := matchScanArray at:mStart+1.
                            nextMatchEntry isCharacter ifTrue:[
                                sStart <= sStop ifTrue:[
                                    [true] whileTrue:[
                                        ignoreCase ifFalse:[
                                            index := aString indexOf:nextMatchEntry startingAt:sStart
                                        ] ifTrue:[
                                            index := aString findFirst:[:c | c asLowercase == nextMatchEntry asLowercase]
                                                           startingAt:sStart.
                                        ].
                                        (index == 0 or:[index > sStop]) ifTrue:[
                                            ^ false
                                        ].
                                        (self matchScan:matchScanArray 
                                              from:(mStart + 1) 
                                              to:mStop 
                                              with:aString 
                                              from:index 
                                              to:sStop 
                                              ignoreCase:ignoreCase 
                                        ) ifTrue:[
                                            ^ true
                                        ].
                                        sStart := index + 1.
                                    ]
                                ]
                            ]
                        ].

                        "
                         no quick check possible;
                         loop over all possible substrings
                        "
                        index := sStart.
                        [index <= sStop] whileTrue:[
                            (self matchScan:matchScanArray 
                                  from:(mStart + 1) 
                                  to:mStop 
                                  with:aString 
                                  from:index 
                                  to:sStop 
                                  ignoreCase:ignoreCase 
                            ) ifTrue:[
                                ^ true
                            ].
                            index := index + 1
                        ].
                        ^ false
                    ].
                ] ifFalse:[
                    (matchEntry isString) ifTrue:[
                        "testString empty -> no match"
                        (sSize == 0) ifTrue:[^ false].

                        included := false.
                        "/ checkChar := aString at:sStart.
                        included := matchEntry includes:checkChar.
                        included ifFalse:[
                            ignoreCase ifTrue:[
                                checkChar isUppercase ifTrue:[
                                    included := matchEntry includes:checkChar asLowercase.
                                ] ifFalse:[
                                    included := matchEntry includes:checkChar asUppercase.
                                ]
                            ].
                        ].
                        mStart := mStart + 1.
                        mSize := mSize - 1.
                        included ifFalse:[^ false].

                        ((sSize == 1) and:[mSize == 0]) ifTrue:[^ true].
                        "cut off 1st char and continue"
                        sStart := sStart + 1
                    ] ifFalse:[
                        "/ must be single character

                        "testString empty ?"
                        (sSize == 0) ifTrue:[^ false].

                        "first characters equal ?"
                        "/ checkChar := aString at:sStart.
                        ignoreCase ifFalse:[^ false].
                        (checkChar asUppercase ~~ matchEntry asUppercase) ifTrue:[^ false].

                        "advance and continue"
                        mStart := mStart + 1.
                        sStart := sStart + 1
                    ]
                ]
            ]
        ]
    ].

    "
     |scanArray s|

     scanArray := self matchScanArrayFrom:'*hello'.
     s := 'foo bar hello world'.
     CharacterArray
         matchScan:scanArray
         from:1 
         to:scanArray size
         with:s
         from:1
         to:s size
         ignoreCase:false
    "
    "
     |scanArray s|

     scanArray := self matchScanArrayFrom:'*hello*'.
     s := 'foo bar hello world'.
     CharacterArray
         matchScan:scanArray
         from:1 
         to:scanArray size
         with:s
         from:1
         to:s size
         ignoreCase:false
    "

    "Modified: / 15.10.1998 / 13:39:25 / cg"
!

matchScanArrayFrom:aString
    "scan a pattern string and decompose it into a scanArray.
     This is processed faster (especially with character ranges), and
     can also be reused later. (if the same pattern is to be searched again)"

    |coll 
     idx "{ Class: SmallInteger }"
     end c1 c2 matchSet previous|

    coll := OrderedCollection new.
    idx := 1. end := aString size.
    [idx <= end] whileTrue:[
        |char this|

        char := aString at:idx.
        char == $* ifTrue:[
            previous ~~ #anyString ifTrue:[
                this := #anyString
            ]
        ] ifFalse:[
            char == $# ifTrue:[
                previous ~~ #anyString ifTrue:[
                    this := #any
                ]
            ] ifFalse:[
                char == $[ ifTrue:[
                    matchSet := IdentitySet new.
                    idx := idx + 1.
                    idx > end ifTrue:[^ nil].
                    char := aString at:idx.
                    c1 := nil.
                    [char ~~ $]] whileTrue:[
                        ((char == $-) and:[c1 notNil]) ifTrue:[
                            idx := idx + 1.
                            idx > end ifTrue:[^ nil].
                            c2 := aString at:idx.
                            c1 to:c2 do:[:c | matchSet add:c].
                            c1 := nil.
                            idx := idx + 1.
                        ] ifFalse:[
                            (char ~~ $]) ifTrue:[
                                matchSet add:char.
                                c1 := char.
                                idx := idx + 1
                            ]
                        ].
                        idx > end ifTrue:[^ nil].
                        char := aString at:idx
                    ].
                    this := matchSet asString
                ] ifFalse:[
                    char == $\ ifTrue:[
                        idx := idx + 1.
                        idx > end ifTrue:[
                            "/ mhmh - what should we do here ?
                            this := char
                        ] ifFalse:[
                            this := aString at:idx.
                        ]
                    ] ifFalse:[
                        this := char
                    ]
                ]
            ]
        ].
        this notNil ifTrue:[coll add:this. previous := this].
        idx := idx + 1
    ].

    ^ coll asArray

    "
     String matchScanArrayFrom:'*ute*'  
     String matchScanArrayFrom:'**ute**'  
     String matchScanArrayFrom:'*uter'   
     String matchScanArrayFrom:'\*uter'   
     String matchScanArrayFrom:'[cC]#mpute[rR]'  
     String matchScanArrayFrom:'[abcd]*'      
     String matchScanArrayFrom:'[a-k]*'      
     String matchScanArrayFrom:'*some*compl*ern*' 
     String matchScanArrayFrom:'[a-'  
     String matchScanArrayFrom:'[a-zA-Z]'  
     String matchScanArrayFrom:'[a-z01234A-Z]'  
    "

    "Modified: 2.4.1997 / 16:20:29 / cg"
! !

!CharacterArray methodsFor:'Compatibility - ST/V'!

byteAt:index put:aByte
    "store a byte at given index.
     This is an ST/V compatibility method."

"/    (aByte == 0) ifTrue:[
"/        "store a space instead"
"/        ^ super basicAt:index put:(Character space)
"/    ].
    ^ super at:index put:(Character value:aByte)

    "
     'hello' copy at:1 put:$H asciiValue; yourself
     'hello' copy byteAt:1 put:72; yourself 
     'hello' copy byteAt:1 put:0; yourself 
    "

    "Modified: 6.5.1996 / 10:35:26 / cg"
!

replChar:oldChar with:newChar
    "return a copy of the receiver, with all oldChars replaced
     by newChar.
     This is an ST/V compatibility method."

    ^ self copyReplaceAll:oldChar with:newChar

    "
     '12345678901234567890' replChar:$0 with:$* 
    "

    "Modified: / 18.7.1998 / 22:52:57 / cg"
!

replChar:oldChar withString:newString
    "return a copy of the receiver, with all oldChars replaced
     by newString (i.e. slice in the newString in place of the oldChar).
     This is an ST/V compatibility method."

    |tmpStream|

    tmpStream := WriteStream on:(self class new).
    self do:[:element |
	element = oldChar ifTrue:[
	    tmpStream nextPutAll:newString
	] ifFalse:[
	    tmpStream nextPut:element 
	].
    ].
    ^ tmpStream contents

   "
     '12345678901234567890' replChar:$0 withString:'foo' 
     'a string with spaces' replChar:$  withString:' foo '  
    "
!

replString:subString withString:newString
    "return a copy of the receiver, with all sequences of subString replaced
     by newString (i.e. slice in the newString in place of the oldString)."

    |tmpStream idx idx1|

    tmpStream := WriteStream on:(self class new).
    idx := 1.
    [idx ~~ 0] whileTrue:[
        idx1 := idx.
        idx := self indexOfSubCollection:subString startingAt:idx.
        idx ~~ 0 ifTrue:[
            tmpStream nextPutAll:(self copyFrom:idx1 to:idx-1).
            tmpStream nextPutAll:newString.
            idx := idx + subString size
        ]
    ].
    tmpStream nextPutAll:(self copyFrom:idx1).
    ^ tmpStream contents

   "
     '12345678901234567890' replString:'123' withString:'OneTwoThree' 
     '12345678901234567890' replString:'123' withString:'*' 
     '12345678901234567890' replString:'234' withString:'foo' 

     ('a string with spaces' replChar:$  withString:' foo ')
        replString:'foo' withString:'bar'
    "

    "Modified: / 31.5.1999 / 12:33:59 / cg"
!

trimBlanks
    "return a copy of the receiver without leading
     and trailing spaces.
     This is an ST/V compatibility method."

    ^ self withoutSpaces

    "
     '    spaces at beginning' trimBlanks     
     'spaces at end    ' trimBlanks           
     '    spaces at beginning and end     ' trimBlanks    
     'no spaces' trimBlanks              
    "
! !

!CharacterArray methodsFor:'Compatibility - ST80'!

expandMacros
    "ST80 compatibility - expand '<..>' macros with
     argument strings. Similar to #bindWith:.
     Read the comment in #expandMacrosWithArguments: about
     limited compatibility issues."

    ^ self expandMacrosWithArguments:#()

    "
     'hello<n>foo' expandMacros
    "

    "Modified: / 18.6.1998 / 16:03:02 / cg"
!

expandMacrosWith:arg
    "ST80 compatibility - expand '<..>' macros with
     argument strings. Similar to #bindWith:.
     Read the comment in #expandMacrosWithArguments: about
     limited compatibility issues."

    ^ self expandMacrosWithArguments:(Array with:arg)

    "Created: / 1.11.1997 / 13:01:28 / cg"
    "Modified: / 1.11.1997 / 13:30:50 / cg"
!

expandMacrosWith:arg1 with:arg2
    "ST80 compatibility - expand '<..>' macros with
     argument strings. Similar to #bindWith:.
     Read the comment in #expandMacrosWithArguments: about
     limited compatibility issues."

    ^ self expandMacrosWithArguments:(Array with:arg1 with:arg2)

    "Modified: / 6.7.1998 / 21:58:14 / cg"
!

expandMacrosWithArguments:argArray
    "ST80 compatibility - expand '<..>' macros with
     argument strings. Similar to #bindWith:.
     WARNING: possibly not all ST80 expansions are supported here."

    "/ supported expansions:
    "/
    "/   <#p>   # is arg Number; slice in the args printString
    "/   <#s>   # is arg Number; slice in the arg itself (must be a String)
    "/   <n>    replace by a newLine character
    "/   <t>    replace by a tab character

    |in out c fmt nr|

    in := self readStream.
    out := '' writeStream.

    [in atEnd] whileFalse:[
        c := in next.
        c == $< ifTrue:[
            in peek == $n ifTrue:[
                out nextPut:Character cr.
                in next
            ] ifFalse:[in peek == $t ifTrue:[
                out nextPut:Character tab.
                in next
            ] ifFalse:[
                "/ start an argument expansion ...
                nr := Integer readFrom:in onError:nil.
                nr isNil ifTrue:[
                    "/ what does VW do here ?
                    self halt:'invalid format'.
                    ^ self
                ] ifFalse:[
                    (nr between:1 and:argArray size) ifFalse:[
                        "/ what does VW do here ?
                        self halt:'invalid format'.
                        ^ self
                    ] ifTrue:[
                        fmt := in next.
                        (fmt == $p) ifTrue:[
                            "/ expand with args printString
                            out nextPutAll:(argArray at:nr) printString.
                        ] ifFalse:[
                            (fmt == $s) ifTrue:[
                                "/ expand with arg itself
                                out nextPutAll:(argArray at:nr).
                            ] ifFalse:[
                                "/ what does VW do here ?
                                self halt:'invalid format'.
                                ^ self
                            ]
                        ]
                    ]
                ].
            ]].
            c := in next.
            c ~~ $> ifTrue:[
                "/ what does VW do here ?
                self halt:'invalid format'.
                ^ self
            ]
        ] ifFalse:[
            out nextPut:c
        ].
    ].
    ^ out contents

    "
     'hello <1p> how are you' expandMacrosWith:(OperatingSystem getLoginName)
    "

    "Modified: / 18.6.1998 / 16:04:46 / cg"
! !

!CharacterArray methodsFor:'Compatibility - Squeak'!

capitalized
    ^ self asUppercaseFirst

    "
     'hello' capitalized
    "
!

findTokens:delimiters
    ^ self asCollectionOfSubstringsSeparatedByAny:delimiters
!

includesSubString:aString
    "return true, if a substring is contained in the receiver.
     The compare is case sensitive."

    ^ self includesString:aString

    "
     'hello world' includesSubString:'Hel'  
     'hello world' includesSubString:'hel'  
     'hello world' includesSubString:'llo'  
    "



!

includesSubstring:aString caseSensitive:caseSensitive
    "return true, if a substring is contained in the receiver.
     The argument, caseSensitive controls if case is ignored in the compare."

    "/ for now,  a q&d hack ...

    caseSensitive ifFalse:[
        ^ self asLowercase includesString:aString asLowercase
    ].
    ^ self includesString:aString

    "
     'hello world' includesSubstring:'Hel' caseSensitive:true  
     'hello world' includesSubstring:'Hel' caseSensitive:false 
    "



!

lastSpacePosition
    ^ self lastIndexOfSeparator
!

withBlanksTrimmed
    "Return a copy of the receiver from which leading and trailing blanks have been trimmed."

    ^ self withoutSpaces

    "
     '  hello    world    ' withBlanksTrimmed  
    "



!

withNoLineLongerThan: aNumber
    "Answer a string with the same content as receiver, but rewrapped so that no line has more characters than the given number"

    | listOfLines currentLast currentStart resultString putativeLast putativeLine crPosition |

    aNumber isNumber not | (aNumber < 1) ifTrue: [self error: 'too narrow'].
    listOfLines _ OrderedCollection new.
    currentLast _ 0.
    [currentLast < self size] whileTrue:
            [currentStart _ currentLast + 1.
            putativeLast _ (currentStart + aNumber - 1) min: self size.
            putativeLine _ self copyFrom: currentStart to: putativeLast.
            (crPosition _ putativeLine indexOf: Character cr) > 0 ifTrue:
                    [putativeLast _ currentStart + crPosition - 1.
                    putativeLine _ self copyFrom: currentStart to: putativeLast].
            currentLast _ putativeLast == self size
                    ifTrue:
                            [putativeLast]
                    ifFalse:
                            [currentStart + putativeLine lastSpacePosition - 1].
            currentLast <= currentStart ifTrue:
                    ["line has NO spaces; baleout!!"
                    currentLast _ putativeLast].
            listOfLines add: (self copyFrom: currentStart to: currentLast) withBlanksTrimmed].

    listOfLines size > 0 ifFalse: [^ ''].
    resultString _ listOfLines first.
    2 to: listOfLines size do:
            [:i | resultString _ resultString, Character cr asString, (listOfLines at: i)].
    ^ resultString

    "
     #(5 7 20) collect:
        [:i | 'Fred the bear went down to the brook to read his book in silence' withNoLineLongerThan: i]
    "



! !

!CharacterArray methodsFor:'Compatibility - V''Age'!

addLineDelimiter
    "replace all '\'-characters by line delimiter (cr) - characters.
     This has been added for VisualAge compatibility."

    ^ self withCRs
!

bindWith:aString
    "return a copy of the receiver, where a '%1' escape is
     replaced by aString.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:aString)

    "
     'do you like %1 ?' bindWith:'smalltalk'
    "
!

bindWith:string1 with:string2
    "return a copy of the receiver, where a '%1' escape is
     replaced by string1 and '%2' is replaced by string2.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:string1 with:string2)

    "
     'do you prefer %1 or rather %2 ?'
	bindWith:'smalltalk' with:'c++'
    "
!

bindWith:str1 with:str2 with:str3
    "return a copy of the receiver, where a '%1', '%2' and '%3' escapes
     are replaced by str1, str2 and str3 respectively.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:str1 with:str2 with:str3)

    "
     'do you prefer %1 or rather %2 (not talking about %3) ?'
	bindWith:'smalltalk' with:'c++' with:'c'
    "
!

bindWith:str1 with:str2 with:str3 with:str4
    "return a copy of the receiver, where a '%1', '%2', '%3' and '%4' escapes
     are replaced by str1, str2, str3 and str4 respectively.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:str1 with:str2 with:str3 with:str4)

    "
     'do you prefer %1 or rather %2 (not talking about %3 or even %4) ?'
	bindWith:'smalltalk' with:'c++' with:'c' with:'assembler'
    "
!

bindWith:str1 with:str2 with:str3 with:str4 with:str5
    "return a copy of the receiver, where a '%1' .. '%5' escapes
     are replaced by str1 .. str5 respectively.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:str1 with:str2 with:str3 with:str4 with:str5)

    "Created: 31.1.1997 / 16:25:42 / cg"
!

bindWithArguments:anArrayOfStrings
    "return a copy of the receiver, where a '%i' escape
     is replaced by the coresponding string from the argument array.
     'i' may be between 1 and 9 (i.e. a maximum of 9 placeholders is allowed).
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:anArrayOfStrings

    "
     'do you prefer %1 or rather %2 (not talking about %3) ?'
	bindWithArguments:#('smalltalk' 'c++' 'c')
    "
!

subStrings
    "return an array consisting of all words contained in the receiver.
     Words are separated by whitespace.
     This has been added for VisualAge compatibility."

    ^ self asCollectionOfWords

    "
     'hello world, this is smalltalk' subStrings
    "
!

subStrings:separatorCharacter
    "return an array consisting of all words contained in the receiver.
     Words are separated by separatorCharacter.
     This has been added for VisualAge compatibility."

    ^ self asCollectionOfSubstringsSeparatedBy:separatorCharacter

    "
     'foo:bar:baz:smalltalk' subStrings:$:
    "
!

trimSeparators
    "return a copy of the receiver without leading and trailing whiteSpace"

    ^ self withoutSeparators
! !

!CharacterArray methodsFor:'character searching'!

includesMatchCharacters
    "return true if the receiver includes any meta characters (i.e. $* or $#) 
     for match operations; false if not.
     Here, do not care for $\ escapes"

    ^ self includesAny:'*#['

    "
     '*foo' includesMatchCharacters   
     '\*foo' includesMatchCharacters  
     '\*foo' includesUnescapedMatchCharacters  
     '*foo' includesMatchCharacters   
     '\\*foo' includesMatchCharacters
     'foo*' includesMatchCharacters
     'foo\*' includesMatchCharacters
     'foo\' includesMatchCharacters
    "

    "Modified: 2.4.1997 / 18:12:34 / cg"
!

includesUnescapedMatchCharacters
    "return true if the receiver really includes any meta characters (i.e. $* or $#) 
     for match operations; false if not.
     Here, care for $\ escapes"

    |idx sz|

    idx := 1.
    sz := self size.
    [true] whileTrue:[
        idx := self indexOfAny:'*#[\' startingAt:idx.
        idx == 0 ifTrue:[^ false].
        (self at:idx) == $\ ifFalse:[^ true].
        idx := idx + 2.
        idx > sz ifTrue:[^ false].
    ].    

    "
     '*foo' includesUnescapedMatchCharacters
     '\*foo' includesUnescapedMatchCharacters
     '*foo' includesUnescapedMatchCharacters
     '\\*foo' includesUnescapedMatchCharacters
     'foo*' includesUnescapedMatchCharacters
     'foo\*' includesUnescapedMatchCharacters
     'foo\' includesUnescapedMatchCharacters
    "

    "Modified: 2.4.1997 / 17:08:52 / cg"
    "Created: 2.4.1997 / 17:23:26 / cg"
!

indexOfControlCharacterStartingAt:startIndex
    "return the index of the next control character;
     starting the search at startIndex, searching forward;
     that is a character with asciiValue < 32.
     Return 0 if none is found."

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.
    mySize := self size.

    start to:mySize do:[:index |
        (self at:index) isControlCharacter ifTrue:[^ index]
    ].
    ^ 0

    "
     'hello world' asTwoByteString            indexOfControlCharacterStartingAt:1
     'hello world\foo' withCRsasTwoByteString indexOfControlCharacterStartingAt:1
    "

    "Modified: / 21.7.1998 / 17:25:07 / cg"
!

indexOfNonSeparatorStartingAt:startIndex
    "return the index of the next non-whitespace character,
     starting the search at startIndex, searching forward;
     return 0 if no non-separator was found"

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.
    mySize := self size.

    start to:mySize do:[:index |
        (self at:index) isSeparator ifFalse:[^ index]
    ].
    ^ 0

    "
     '    hello world' indexOfNonSeparatorStartingAt:1 
     '    ' indexOfNonSeparatorStartingAt:1            
     'a   ' indexOfNonSeparatorStartingAt:2            
    "

    "
     |s index1 index2|
     s := '   foo    bar      baz'.
     index1 := s indexOfNonSeparatorStartingAt:1.
     index2 := s indexOfSeparatorStartingAt:index1.
     s copyFrom:index1 to:index2 - 1
    "
!

indexOfSeparator
    "return the index of the first whitespace character;
     starting the search at the beginning, searching forward;
     return 0 if no separator was found"

    ^ self indexOfSeparatorStartingAt:1

    "
     'hello world' indexOfSeparator 
     'helloworld' indexOfSeparator  
     'hello   ' indexOfSeparator    
     '   hello' indexOfSeparator    
    "
!

indexOfSeparatorStartingAt:startIndex
    "return the index of the next whitespace character,
     starting the search at startIndex, searching forward;
     return 0 if no separator was found"

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.
    mySize := self size.

    start to:mySize do:[:index |
        (self at:index) isSeparator ifTrue:[^ index]
    ].
    ^ 0

    "
     'hello world' indexOfSeparatorStartingAt:3   
     ' hello world' indexOfSeparatorStartingAt:3  
     'hello world ' indexOfSeparatorStartingAt:3  
     'hello world ' indexOfSeparatorStartingAt:6  
     'hello world ' indexOfSeparatorStartingAt:7  
     'helloworld ' indexOfSeparatorStartingAt:7   
     'helloworld' indexOfSeparatorStartingAt:7   
    "
!

lastIndexOfSeparator
    "return the last index of the whitespace character.
     (i.e. start the search at the end and search backwards);
     Returns 0 if no separator is found."

    ^ self lastIndexOfSeparatorStartingAt:(self size)

    "
     'hello world' lastIndexOfSeparator  
     'helloworld' lastIndexOfSeparator      
     'hel lo wor ld' lastIndexOfSeparator   
     'hel   ' lastIndexOfSeparator 6  
    "
!

lastIndexOfSeparatorStartingAt:startIndex
    "return the index of the previous whitespace character,
     starting the search at startIndex (and searching backwards);
     returns 0 if no separator was found"

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.

    start to:1 by:-1 do:[:index |
        (self at:index) isSeparator ifTrue:[^ index]
    ].
    ^ 0

    "
     'hello world' lastIndexOfSeparatorStartingAt:3
     'hello world' lastIndexOfSeparatorStartingAt:7
     'helloworld' lastIndexOfSeparatorStartingAt:7 
     ' helloworld' lastIndexOfSeparatorStartingAt:7 
    "
! !

!CharacterArray methodsFor:'comparing'!

< something
    "Compare the receiver with the argument and return true if the
     receiver is less than the argument. Otherwise return false.
     This comparison is based on the elements ascii code - 
     i.e. upper/lowercase & national characters are NOT treated specially."

    ^ (something > self)

    "Modified: 22.4.1996 / 15:54:54 / cg"
!

<= something
    "Compare the receiver with the argument and return true if the
     receiver is less than or equal to the argument. Otherwise return false.
     This comparison is based on the elements ascii code - 
     i.e. upper/lowercase & national characters are NOT treated specially."

    ^ (self > something) not

    "Modified: 22.4.1996 / 15:54:56 / cg"
!

= aString
    "Compare the receiver with the argument and return true if the
     receiver is equal to the argument. Otherwise return false.

     This compare does NOT ignore case differences, 
     therefore 'foo' = 'Foo' will return false.
     Since this is incompatible to ST-80 (at least, V2.x) , this may change."

    |mySize    "{ Class: SmallInteger }"
     otherSize |

    aString species == self species ifFalse:[^ false].
    mySize := self size.
    otherSize := aString size.
    mySize == otherSize ifFalse:[^ false].

    1 to:mySize do:[:index |
        (self at:index) = (aString at:index) ifFalse:[^ false].
    ].
    ^ true

    "
     'foo' = 'Foo'  
     'foo' = 'bar'  
     'foo' = 'foo'   
    "

    "Modified: 22.4.1996 / 15:53:58 / cg"
!

> aString
    "Compare the receiver with the argument and return true if the
     receiver is greater than the argument. Otherwise return false.
     This comparison is based on the elements ascii code - 
     i.e. upper/lowercase & upper/lowercase & national characters are NOT treated specially."

    |mySize    "{ Class: SmallInteger }"
     otherSize "{ Class: SmallInteger }" 
     n         "{ Class: SmallInteger }" 
     c1 c2|

    mySize := self size.
    otherSize := aString size.
    n := mySize min:otherSize.

    1 to:n do:[:index |
        c1 := self at:index.
        c2 := aString at:index.
        c1 > c2 ifTrue:[^ true].
        c1 < c2 ifTrue:[^ false].
    ].
    ^ mySize > otherSize

    "Modified: 22.4.1996 / 15:55:00 / cg"
!

>= something
    "Compare the receiver with the argument and return true if the
     receiver is greater than or equal to the argument.
     Otherwise return false.
     This comparison is based on the elements ascii code - 
     i.e. upper/lowercase & national characters are NOT treated specially."

    ^ (something > self) not

    "Modified: 22.4.1996 / 15:55:03 / cg"
!

compareWith:aString
    "Compare the receiver with the argument and return 1 if the receiver is
     greater, 0 if equal and -1 if less than the argument. 
     This comparison is based on the elements ascii code - 
     i.e. upper/lowercase & national characters are NOT treated specially.
     'foo' compareWith: 'Foo' will return 1.
     while 'foo' sameAs:'Foo' will return true"

    |mySize    "{ Class: SmallInteger }"
     otherSize "{ Class: SmallInteger }" 
     n         "{ Class: SmallInteger }" 
     c1 c2|

    mySize := self size.
    otherSize := aString size.
    n := mySize min:otherSize.

    1 to:n do:[:index |
        c1 := self at:index.
        c2 := aString at:index.
        c1 > c2 ifTrue:[^ 1].
        c1 < c2 ifTrue:[^ -1].
    ].
    mySize > otherSize ifTrue:[^ 1].
    mySize < otherSize ifTrue:[^ -1].
    ^ 0

    "Modified: 22.4.1996 / 15:56:07 / cg"
!

hash
    "return an integer useful as a hash-key"

%{  /* NOCONTEXT */

    REGISTER unsigned g, val;
    REGISTER unsigned char *cp, *cp0;
    int l;

    cp = __stringVal(self);
    l = __stringSize(self);
    if (__qClass(self) != @global(String)) {
        int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(__qClass(self))->c_ninstvars));

        cp += n;
        l -= n;
    }

    /*
     * this is the dragon-book algorithm 
     */

    val = 0;
    switch (l) {
    default:
        for (cp0 = cp, cp += l - 1; cp >= cp0; cp--) {
            val = (val << 4) + *cp;
            if (g = (val & 0xF0000000)) {
                val ^= g >> 24;
                val ^= g;
            }
        }
        break;
    case 7:
        val = cp[6] << 4;
    case 6:
        val = (val + cp[5]) << 4;
    case 5:
        val = (val + cp[4]) << 4;
    case 4:
        val = (val + cp[3]) << 4;
    case 3:
        val = (val + cp[2]) << 4;
    case 2:
        val = (val + cp[1]) << 4;
    case 1:
        val = val + cp[0];
    case 0:
        break;
    }

    /*
     * multiply by large prime to spread values
     * This speeds up Set and Dictionary by a factor of 10!
     */
    val *= 31415821;
    RETURN ( __MKSMALLINT(val & 0x3fffffff));
%}

!

sameAs:aString
    "Compare the receiver with the argument like =, but ignore
     case differences. 
     Return true or false."

    |mySize "{ Class: SmallInteger }"
     otherSize c1 c2|

    self == aString ifTrue:[^ true].

    mySize := self size.
    otherSize := aString size.
    mySize == otherSize ifFalse:[^ false].

    1 to:mySize do:[:index |
        c1 := self at:index.
        c2 := aString at:index.
        c1 == c2 ifFalse:[
            c1 asLowercase = c2 asLowercase ifFalse:[^ false].
        ]
    ].
    ^ true

    "
     'foo' sameAs: 'Foo'   
     'foo' sameAs: 'bar' 
     'foo' sameAs: 'foo'   
    "

    "Modified: 22.4.1996 / 15:56:17 / cg"
!

sameCharacters:aString
    "count & return the number of characters which are the same
     (ignoring case) in the receiver and the argument, aString."

    |n "{ Class: SmallInteger }"
     c1 c2 cnt|

    n := self size.
    n := n min:(aString size).

    cnt := 0.
    1 to:n do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	((c1 == c2)
	or:[c1 asLowercase = c2 asLowercase]) ifTrue:[
	    cnt := cnt + 1
	]
    ].
    ^ cnt

    "
     'foobarbaz' sameCharacters: 'foo'   
     'foobarbaz' sameCharacters: 'Foo'   
     'foobarbaz' sameCharacters: 'baz'   
    "
! !

!CharacterArray methodsFor:'converting'!

asArrayOfSubstrings
    "return an array of substrings from the receiver, interpreting
     separators (i.e. spaces & newlines) as word-delimiters.
     This is a compatibility method - the actual work is done in
     asCollectionOfWords."

    ^ self asCollectionOfWords asArray

    "
     '1 one two three four 5 five' asArrayOfSubstrings  
    "
!

asCollectionOfLines
    "return a collection containing the lines (separated by cr) 
     of the receiver. If multiple cr's occur in a row, the result will
     contain empty strings."

    ^ self asCollectionOfSubstringsSeparatedBy:Character cr

    "
     '1 one\2 two\3 three\4 four\5 five' withCRs asCollectionOfLines
     '1 one\\\\2 two\3 three' withCRs asCollectionOfLines  
    "
!

asCollectionOfSubstringsSeparatedBy:aCharacter
    "return a collection containing the lines (separated by aCharacter) 
     of the receiver. If aCharacter occurs multiple times in a row, 
     the result will contain empty strings."

    |lines myClass
     numberOfLines "{ Class:SmallInteger }"
     startIndex    "{ Class:SmallInteger }"
     stopIndex     "{ Class:SmallInteger }" |

    "
     count first, to avoid regrowing of the OC
    "
    numberOfLines := (self occurrencesOf:aCharacter) + 1.
    lines := OrderedCollection new:numberOfLines.
    myClass := self species.

    startIndex := 1.
    1 to:numberOfLines do:[:lineNr |
	stopIndex := self indexOf:aCharacter startingAt:startIndex.
	stopIndex == 0 ifTrue:[
	    stopIndex := self size
	] ifFalse: [
	    stopIndex := stopIndex - 1.
	].

	(stopIndex < startIndex) ifTrue: [
	    lines add:(myClass new:0)
	] ifFalse: [
	    lines add:(self copyFrom:startIndex to:stopIndex)
	].
	startIndex := stopIndex + 2
    ].
    ^ lines

    "
     '1 one:2 two:3 three:4 four:5 five' withCRs asCollectionOfSubstringsSeparatedBy:$: 
     '1 one 2 two 3 three 4 four 5 five' withCRs asCollectionOfSubstringsSeparatedBy:Character space
    "
!

asCollectionOfSubstringsSeparatedByAny:aCollectionOfSeparators
    "return a collection containing the words (separated by any character
     from aCollectionOfSeparators) of the receiver.
     This allows breaking up strings using any character as separator."

    |words
     start  "{ Class:SmallInteger }" 
     stop   "{ Class:SmallInteger }" 
     mySize "{ Class:SmallInteger }"|

    words := OrderedCollection new.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	"skip multiple separators"
	[aCollectionOfSeparators includes:(self at:start)] whileTrue:[
	    start := start + 1 .
	    start > mySize ifTrue:[
		^ words
	    ].
	].

	stop := self indexOfAny:aCollectionOfSeparators startingAt:start.
	stop == 0 ifTrue:[
	    words add:(self copyFrom:start to:mySize).
	    ^ words
	].
	words add:(self copyFrom:start to:(stop - 1)).
	start := stop
    ].
    ^ words

    "
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:#($:)
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:':' 
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:(Array with:$: with:Character space) 
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:': ' 
     'h1e2l3l4o' asCollectionOfSubstringsSeparatedByAny:($1 to: $9) 
    "
!

asCollectionOfWords
    "return a collection containing the words (separated by whitespace) 
     of the receiver. Multiple occurences of whitespace characters will
     be treated like one - i.e. whitespace is skipped."

    |words
     start  "{ Class:SmallInteger }" 
     stop   "{ Class:SmallInteger }" 
     mySize "{ Class:SmallInteger }"|

    words := OrderedCollection new.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	start := self indexOfNonSeparatorStartingAt:start.
	start == 0 ifTrue:[
	    ^ words
	].
	stop := self indexOfSeparatorStartingAt:start.
	stop == 0 ifTrue:[
	    words add:(self copyFrom:start to:mySize).
	    ^ words
	].
	words add:(self copyFrom:start to:(stop - 1)).
	start := stop
    ].
    ^ words

    "
     'hello world isnt this nice' asCollectionOfWords
     '    hello    world   isnt   this   nice  ' asCollectionOfWords
     'hello' asCollectionOfWords
     '' asCollectionOfWords
     '      ' asCollectionOfWords
    "
!

asComposedText
    "ST-80 compatibility 
     - ST/X does not (as today) support composedTexts."

    ^ ComposedText fromString:self

    "Modified: 27.4.1996 / 13:30:30 / cg"
!

asFilename
    "return a Filename with pathname taken from the receiver"

    ^ Filename named:self string

    "Modified: 20.5.1996 / 09:38:15 / cg"
!

asFixedPoint
    "read a fixedPoint number from the receiver.
     Notice, that errors may occur during the read, so you better
     setup some signal handler when using this method."

    ^ FixedPoint readFromString:self

    "
     '0.123' asFixedPoint    
     '12345' asFixedPoint    
     '(1/5)' asFixedPoint
     'foo' asFixedPoint
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asFixedPoint] 
    "

    "Modified: / 25.10.1997 / 15:19:00 / cg"
!

asFixedPoint:scale
    "read a fixedPoint number with scale number of post-decimal digits
     from the receiver. Scale controls the number of displayed digits,
     not the number of actually valid digits.
     Notice, that errors may occur during the read, so you better
     setup some signal handler when using this method."

    ^ (FixedPoint readFromString:self) scale:scale

    "
     '0.123' asFixedPoint:2     
     '123456' asFixedPoint:2    
     ('3.14157' asFixedPoint:1) asFixedPoint:5    
    "

    "Modified: / 25.10.1997 / 15:21:57 / cg"
!

asFloat
    "read a float number from the receiver.
     Notice, that errors may occur during the read, so you better
     setup some signal handler when using this method."

    ^ (Number readFromString:self) asFloat

    "
     '0.123' asFloat 
     '12345' asFloat
     '(1/5)' asFloat
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asFloat] 
    "
!

asInteger
    "read an integer from the receiver.
     Notice, that errors may occur during the read, so you better
     setup some signal handler when using this method."

    ^ Integer readFromString:self

    "
     '12345678901234567890' asInteger
     '-1234' asInteger
     '0.123' asInteger   <- reader stops at ., returning 0 here
     '0.123' asNumber    <- returns what you expect
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asInteger] 
    "
!

asLowercase
    "return a copy of myself in lowercase letters"

    |newStr
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    1 to:mySize do:[:i |
	newStr at:i put:(self at:i) asLowercase
    ].
    ^ newStr

    "
     'HelloWorld' asLowercase   
     'HelloWorld' asLowercaseFirst   
    "
!

asLowercaseFirst
    "return a copy of myself where the first character is
     converted to lowercase."

    |newString sz|

    sz := self size.
    newString := self copyFrom:1 to:sz.
    sz > 0 ifTrue:[
	newString at:1 put:(newString at:1) asLowercase
    ].
    ^ newString

    "
     'HelloWorld' asLowercase   
     'HelloWorld' asLowercaseFirst   
    "
!

asNumber
    "read a number from the receiver.
     Notice, that (in contrast to ST-80) errors may occur during the read, 
     so you better setup some signal handler when using this method.
     This may change if ANSI specifies it."

"/ ST-80 behavior:
"/  ^ Number readFromString:self onError:0

    ^ Number readFromString:self

    "
     '123'     asNumber
     '123.567' asNumber
     '(5/6)'   asNumber
     'foo'     asNumber
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asNumber] 
    "
!

asNumberFromFormatString:ignored
    "read a number from the receiver, ignoring any nonDigit characters.
     This is typically used to convert from strings which include
     dollar-signs or millenium digits. However, this method also ignores
     the decimal point (if any) and therefore should be used with care."

    |tempString|

    tempString := self collect:[:char | char isDigit].
    ^ Number readFromString:tempString onError:0

    "
     'USD 123' asNumberFromFormatString:'foo'
     'DM 123'  asNumberFromFormatString:'foo'
     '123'     asNumberFromFormatString:'foo'
     '123.567' asNumberFromFormatString:'foo'
     '(5/6)'   asNumberFromFormatString:'foo'
     'foo'     asNumberFromFormatString:'foo'
    "
!

asSingleByteString
    "return the receiver converted to a 'normal' string"

    ^ String fromString:self
!

asSingleByteStringReplaceInvalidWith:replacementCharacter
    "return the receiver converted to a 'normal' string,
     with invalid characters replaced by replacementCharacter.
     Can be used to convert from 16-bit strings to 8-bit strings
     and replace characters above code-255 with some replacement."

    |newString|

    newString := String new:(self size).
    1 to:self size do:[:idx |
        |char|

        char := self at:idx.
        char asciiValue <= 16rFF ifTrue:[
            newString at:idx put:char
        ] ifFalse:[
            newString at:idx put:replacementCharacter
        ].
    ].
    ^ newString

    "Created: 30.6.1997 / 13:02:14 / cg"
!

asString
    "return myself - I am a string"

    ^ self
!

asStringCollection
    "return a collection of lines from myself."

    ^ StringCollection from:self "string"

    "Modified: 13.5.1996 / 20:36:59 / cg"
!

asSymbol
    "return a unique symbol with the name taken from my characters.
     The receiver must be a singleByte-String; twoByteSymbols are
     (currently) not allowed."

    ^ self string asSymbol

    "Created: 22.5.1996 / 15:56:11 / cg"
!

asSymbolIfInterned
    "if a symbol with the receivers characters is already known, return it.
     Otherwise, return nil. This can be used to query for an existing
     symbol and is the same as
        self knownAsSymbol ifTrue:[self asSymbol] ifFalse:[nil]
     but slightly faster, since the symbol lookup operation is only
     performed once.
     The receiver must be a singleByte-String; twoByteSymbols are
     (currently) not allowed."

    ^ self string asSymbolIfInterned

    "Created: 22.5.1996 / 16:37:04 / cg"
!

asText
    "return a Text-object (collection of lines) from myself."

    Text isNil ifTrue:[^ self].
    ^ Text fromString:self

    "Created: 12.5.1996 / 10:41:14 / cg"
!

asTwoByteString
    "return the receiver converted to a two-byte string"

    ^ TwoByteString fromString:self
!

asUppercase
    "return a copy of myself in uppercase letters"

    |newStr
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    1 to:mySize do:[:i |
	newStr at:i put:(self at:i) asUppercase
    ].
    ^ newStr

    "
     'helloWorld' asUppercase      
     'helloWorld' asUppercaseFirst 
    "
!

asUppercaseFirst
    "return a copy of myself where the first character is
     converted to uppercase."

    |newString sz|

    sz := self size.
    newString := self copyFrom:1 to:sz.
    sz > 0 ifTrue:[
	newString at:1 put:(newString at:1) asUppercase
    ].
    ^ newString

    "
     'helloWorld' asUppercase      
     'helloWorld' asUppercaseFirst 
     'HelloWorld' asUppercaseFirst   
    "
!

string
    "return the receiver - for ST-80 compatibility"

    ^ self

    "Modified: 27.4.1996 / 13:29:30 / cg"
!

tokensBasedOn:aCharacter
    "this is an ST-80 alias for the ST/X method
	asCollectionOfSubstringsSeparatedBy:"

    ^ self asCollectionOfSubstringsSeparatedBy:aCharacter

    "
     'hello:world:isnt:this nice' tokensBasedOn:$:
     'foo,bar,baz' tokensBasedOn:$,
     '/etc/passwd' asFilename readStream nextLine tokensBasedOn:$:
    "
! !

!CharacterArray methodsFor:'copying'!

, aStringOrCharacter
    "redefined to allow characters and mixed strings to be appended.
     This is nonStandard, but convenient"

    |myWidth otherWidth|

    aStringOrCharacter isCharacter ifTrue:[
        ^ self , aStringOrCharacter asString
    ].
    aStringOrCharacter isText ifTrue:[
        ^ aStringOrCharacter concatenateFromString:self
    ].
    aStringOrCharacter isString ifTrue:[
        (otherWidth := aStringOrCharacter bitsPerCharacter) ~~ (myWidth := self bitsPerCharacter) ifTrue:[
            otherWidth > myWidth ifTrue:[
                ^ (aStringOrCharacter species fromString:self) , aStringOrCharacter
            ].
            ^ self , (self species fromString:aStringOrCharacter)
        ].
    ].
    ^ super , aStringOrCharacter

    "
     'hello' , $1    
     'hello' , '1'   
     'hello' , (' world' asText allBold) 
     'hello' , (JISEncodedString fromString:' world') 
     (JISEncodedString fromString:'hello') , ' world'

     Transcript showCR:
         (Text string:'hello' emphasis:#italic) , (Text string:' world' emphasis:#bold) 
    "

    "Modified: 28.6.1997 / 00:13:17 / cg"
!

concatenate:string1 and:string2
    "return the concatenation of myself and the arguments, string1 and string2.
     This is equivalent to self , string1 , string2
     - generated by compiler when such a construct is detected and the receiver
     is known to be a string."

    ^ self , string1 , string2
!

concatenate:string1 and:string2 and:string3
    "return the concatenation of myself and the string arguments.
     This is equivalent to self , string1 , string2 , string3
     - generated by compiler when such a construct is detected and the receiver
     is known to be a string."

    ^ self , string1 , string2 , string3
! !

!CharacterArray methodsFor:'displaying'!

displayOn:aGC x:x y:y from:start to:stop
    "display the receiver on a GC"

    "q&d hack"

    (self copyFrom:start to:stop) displayOn:aGC x:x y:y opaque:false

    "Modified: 12.5.1996 / 12:49:33 / cg"
!

displayOn:aGc x:x y:y opaque:opq
    "display the receiver in a graphicsContext - this method allows
     strings to be used like DisplayObjects."

    |s|

    s := self string.
    opq ifTrue:[
        aGc displayOpaqueString:s x:x y:y.
    ] ifFalse:[
        aGc displayString:s x:x y:y.
    ].

    "Modified: 11.5.1996 / 14:42:48 / cg"
!

displayOpaqueOn:aGC x:x y:y from:start to:stop
    "display the receiver on a GC"

    "q&d hack"

    (self copyFrom:start to:stop) displayOn:aGC x:x y:y opaque:true

    "Created: 12.5.1996 / 12:29:37 / cg"
    "Modified: 12.5.1996 / 12:49:19 / cg"
! !

!CharacterArray methodsFor:'emphasis'!

emphasis
    "return the emphasis.
     Since characterArrays do not hold any emphasis information,
     nil (no emphasis) is returned here."

    ^ RunArray new:self size withAll:nil

    "Created: 14.5.1996 / 13:58:58 / cg"
!

emphasisAt:characterIndex
    "return the emphasis at some index.
     Since characterArrays do not hold any emphasis information,
     nil (no emphasis) is returned here."

    ^ nil

    "Created: 11.5.1996 / 14:13:27 / cg"
!

emphasisCollection
    "return the emphasis.
     Since characterArrays do not hold any emphasis information,
     nil (no emphasis) is returned here."

    ^ RunArray new:(self size)

    "Created: 14.5.1996 / 13:58:58 / cg"
    "Modified: 14.5.1996 / 15:02:29 / cg"
!

emphasizeAllWith:emphasis
    ^ self asText emphasizeAllWith:emphasis

    "
     Transcript showCR:('hello' emphasizeAllWith:#bold)
     Transcript showCR:('hello' emphasizeAllWith:(#color -> Color red))
     Transcript showCR:('hello' emphasizeAllWith:(#color -> Color red))
    "

    "Modified: / 17.6.1998 / 12:51:44 / cg"
!

makeSelectorBoldIn:aClass
    "the receiver represents some source code for
     a method in aClass.
     Change myself to boldify the selector.
     Not yet implemented (could easily use the syntaxHighlighter
     for this ...)"

    ^ self

    "Modified: / 13.12.1999 / 21:49:11 / cg"
    "Created: / 13.12.1999 / 21:49:24 / cg"
! !

!CharacterArray methodsFor:'encoding/decoding'!

decodeFrom:encodingSymbol
    "given the receiver encoded as described by encodingSymbol, 
     convert it into internal ST/X encoding and return a corresponding CharacterArray.
     Currently, only a few encodings are supported 
     (and those are untested/incomplete):
        #euc
        #jis7
        #sjis
        #mac
        #msdos
    "

    |newString table|

    encodingSymbol == #'x-jis7' ifTrue:[
        ^ JISEncodedString decodeFromJIS7:self
    ].
    encodingSymbol == #'x-iso2022-jp' ifTrue:[
        ^ JISEncodedString decodeFromJIS7:self
    ].
    encodingSymbol == #'iso2022-jp' ifTrue:[
        ^ JISEncodedString decodeFromJIS7:self
    ].
    encodingSymbol == #'x-sjis' ifTrue:[
        ^ JISEncodedString decodeFromSJIS:self
    ].
    encodingSymbol == #'x-shift-jis' ifTrue:[
        ^ JISEncodedString decodeFromSJIS:self
    ].
    encodingSymbol == #'x-euc-jp' ifTrue:[
        ^ JISEncodedString decodeFromEUC:self
    ].
    encodingSymbol == #big5 ifTrue:[
        ^ BIG5EncodedString decodeFromBIG5:self
    ].
    encodingSymbol == #gb2312 ifTrue:[
        ^ GBEncodedString decodeFromGB:self
    ].
    encodingSymbol == #'x-euc-gb' ifTrue:[
        ^ GBEncodedString decodeFromEUC:self
    ].
    encodingSymbol == #'x-euc-kr' ifTrue:[
        ^ KSCEncodedString decodeFromEUC:self
    ].

    encodingSymbol == #ucs2 ifTrue:[
        ^ UnicodeString decodeFromUCS2:self
    ].
    encodingSymbol == #ucs4 ifTrue:[
        ^ UnicodeString decodeFromUCS4:self
    ].
    encodingSymbol == #utf7 ifTrue:[
        ^ UnicodeString decodeFromUTF7:self
    ].
    encodingSymbol == #utf8 ifTrue:[
        ^ UnicodeString decodeFromUTF8:self
    ].
    encodingSymbol == #utf16 ifTrue:[
        ^ UnicodeString decodeFromUTF16:self
    ].


    "/ soon obsolete ...
    encodingSymbol == #jis7 ifTrue:[
        ^ JISEncodedString decodeFromJIS7:self
    ].
    encodingSymbol == #sjis ifTrue:[
        ^ JISEncodedString decodeFromSJIS:self
    ].
    encodingSymbol == #euc ifTrue:[
        ^ JISEncodedString decodeFromEUC:self
    ].

    encodingSymbol == #gb ifTrue:[
        ^ GBEncodedString decodeFromGB:self
    ].
    encodingSymbol == #ksc ifTrue:[
        ^ KSCEncodedString decodeFromKSC:self
    ].

    table := self class decoderTableFor:encodingSymbol.
    table isNil ifTrue:[^ self].

    newString := self class new:self size.
    newString replaceFrom:self translateBy:table.
    newString = self ifTrue:[^ self].
    ^ newString

    "Created: 22.2.1996 / 15:06:49 / cg"
    "Modified: 16.7.1997 / 13:45:33 / cg"
!

encodeForDisplayInto:encodingSymbol
    "given the receiver in internal ST/X encoding, 
     convert it into the format as described by encodingSymbol
     for display.
     The receiver may contain roman characters in 0..255, which
     are translated according to the encodings roman translation.
     For example: in JIS, roman $a is found at 16r2361.
    "

    |newString table|

    encodingSymbol isNil ifTrue:[^ self].

    "/ notice: this method may not be moved to
    "/ concrete TwoByte classes; it is also invoked
    "/ to convert singleByte strings ...

    (encodingSymbol startsWith:'jis') ifTrue:[
        newString := JISEncodedString encodeRomans:self.

        "/ jisx0208.1983 has no half-width katakana
        (encodingSymbol startsWith:'jisx0208.1983') ifTrue:[
            newString := JISEncodedString encodeHalfWidthKatakana:newString.
        ].
        ^ newString
    ].

    (encodingSymbol startsWith:'big5') ifTrue:[
        ^ BIG5EncodedString encodeRomans:self
    ].

    (encodingSymbol startsWith:'gb') ifTrue:[
        ^ GBEncodedString encodeRomans:self
    ].

    (encodingSymbol startsWith:'ksc') ifTrue:[   
        ^ KSCEncodedString encodeRomans:self
    ].

    (encodingSymbol startsWith:'uni') ifTrue:[
        ^ UnicodeString encodeRomans:self
    ].

    table := self class encoderTableFor:encodingSymbol.
    table isNil ifTrue:[^ self].

    newString := self class new:self size.
    newString replaceFrom:self translateBy:table.
    newString = self ifTrue:[^ self].
    ^ newString

    "Modified: 2.7.1997 / 15:02:56 / cg"
!

encodeInto:encodingSymbol
    "given the receiver in internal ST/X encoding, 
     convert it into the format as described by encodingSymbol
     and return a corresponding CharacterArray.
     Currently, only a few encodings are supported 
     (and those are untested/incomplete):
        #x-euc-jp
        #x-jis7 (== iso2022-jp)
        #x-sjis
        #x-shift-jis
        #big5
        #gb
        #mac
        #msdos
        #next
    "

    |newString table|

"/  self encoding storeString print. ' -> ' print. encodingSymbol storeString printNL.

    encodingSymbol isNil ifTrue:[^ self].

    encodingSymbol == #'x-jis7' ifTrue:[
        ^ JISEncodedString encodeIntoJIS7:self
    ].
    encodingSymbol == #'x-iso2022-jp' ifTrue:[
        ^ JISEncodedString encodeIntoJIS7:self
    ].
    encodingSymbol == #'iso2022-jp' ifTrue:[
        ^ JISEncodedString encodeIntoJIS7:self
    ].
    encodingSymbol == #'x-sjis' ifTrue:[
        ^ JISEncodedString encodeIntoSJIS:self
    ].
    encodingSymbol == #'x-shift-jis' ifTrue:[
        ^ JISEncodedString encodeIntoSJIS:self
    ].
    encodingSymbol == #'x-euc-jp' ifTrue:[
        ^ JISEncodedString encodeIntoEUC:self
    ].

    encodingSymbol == #big5 ifTrue:[
        ^ BIG5EncodedString encodeIntoBIG5:self
    ].

    encodingSymbol == #gb2312 ifTrue:[
        ^ GBEncodedString encodeIntoGB:self
    ].
    encodingSymbol == #'x-euc-gb' ifTrue:[
        ^ GBEncodedString encodeIntoGB:self
    ].

    encodingSymbol == #'x-euc-kr' ifTrue:[
        ^ KSCEncodedString encodeIntoKSC:self
    ].

    encodingSymbol == #ucs2 ifTrue:[
        ^ UnicodeString encodeIntoUCS2:self
    ].
    encodingSymbol == #ucs4 ifTrue:[
        ^ UnicodeString encodeIntoUCS4:self
    ].
    encodingSymbol == #utf7 ifTrue:[
        ^ UnicodeString encodeIntoUTF7:self
    ].
    encodingSymbol == #utf8 ifTrue:[
        ^ UnicodeString encodeIntoUTF8:self
    ].
    encodingSymbol == #utf16 ifTrue:[
        ^ UnicodeString encodeIntoUTF16:self
    ].


    "/ to be obsoleted ...
    encodingSymbol == #jis7 ifTrue:[
        ^ JISEncodedString encodeIntoJIS7:self
    ].
    encodingSymbol == #sjis ifTrue:[
        ^ JISEncodedString encodeIntoSJIS:self
    ].
    encodingSymbol == #euc ifTrue:[
        ^ JISEncodedString encodeIntoEUC:self
    ].

    encodingSymbol == #gb ifTrue:[
        ^ GBEncodedString encodeIntoGB:self
    ].
    encodingSymbol == #ksc ifTrue:[
        ^ KSCEncodedString encodeIntoKSC:self
    ].

    table := self class encoderTableFor:encodingSymbol.
    table isNil ifTrue:[^ self].

    newString := self class new:self size.
    newString replaceFrom:self translateBy:table.
    newString = self ifTrue:[^ self].
    ^ newString

    "Created: 22.2.1996 / 15:07:31 / cg"
    "Modified: 16.7.1997 / 13:56:21 / cg"
!

replaceFrom:aString decode:encoding 
    "this is an experimental interface - unfinished"

    ^ self replaceFrom:aString translateBy:(self class decoderTableFor:encoding)

    "Created: 20.10.1995 / 23:00:09 / cg"
    "Modified: 22.2.1996 / 16:08:26 / cg"
!

replaceFrom:aString encode:encoding 
    "this is an experimental interface - unfinished"

    ^ self replaceFrom:aString translateBy:(self class encoderTableFor:encoding)

    "Modified: 20.10.1995 / 23:08:16 / cg"
    "Created: 22.2.1996 / 16:08:34 / cg"
!

replaceFrom:aString translateBy:encodingTable 
    "replace the receivers characters by translations coming from
     an encoding table."

    |mySize  "{ Class: SmallInteger }" 
     maxCode "{ Class: SmallInteger }"
     char 
     oldCode "{ Class: SmallInteger }"
     newCode|

    self replaceFrom:1 with:aString.
    encodingTable isNil ifTrue:[^ self].

    maxCode := encodingTable size.
    mySize := self size.

    1 to:mySize do:[:index |
        char := aString at:index.
        oldCode := char asciiValue.
        oldCode >= maxCode ifFalse:[
            newCode := encodingTable at:(oldCode + 1).
            newCode ~~ oldCode ifTrue:[
                self at:index put:(Character value:newCode)
            ]
        ]
    ].

    "Created: 22.2.1996 / 16:07:26 / cg"
    "Modified: 1.7.1997 / 17:18:06 / cg"
! !

!CharacterArray methodsFor:'padded copying'!

centerPaddedTo:newSize
     "return a new string consisting of the receivers characters,
     plus spaces up to length and center the receivers characters in
     the resulting string.
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged."

     ^ self centerPaddedTo:newSize with:(Character space)

    "
     'foo' centerPaddedTo:10            
     123 printString centerPaddedTo:10 
    "

    "Created: 25.11.1995 / 10:53:57 / cg"
!

centerPaddedTo:size with:padCharacter
    "return a new string of length size, which contains the receiver
     centered (i.e. padded on both sides).
     Characters are filled with padCharacter.
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged."

    |len s|

    len := self size.
    (len < size) ifTrue:[
	s := self species new:size withAll:padCharacter.
	s replaceFrom:(size - len) // 2  + 1 with:self.
	^ s
    ]

    "
     'foo' centerPaddedTo:11 with:$.     
     'fooBar' centerPaddedTo:5 with:$.      
     123 printString centerPaddedTo:10 with:$.        
     (' ' , 123 printString) centerPaddedTo:10 with:$.        
     (Float pi printString) centerPaddedTo:15 with:(Character space)  
     (Float pi printString) centerPaddedTo:15 with:$-           
     (' ' , Float pi class name) centerPaddedTo:15 with:$.     
    "
!

decimalPaddedTo:size and:afterPeriod at:decimalCharacter
    "return a new string of overall length size, which contains the receiver
     aligned at the decimal-period column and afterPeriod characters to the right
     of the period. The periodCharacter is passed as arguments (allowing for US and European formats
     to be padded).
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged.
     (sounds complicated ? -> see examples below)."

    ^ self 
	decimalPaddedTo:size 
	and:afterPeriod 
	at:decimalCharacter
	withLeft:(Character space)
	right:$0

    "                                                    
     '123' decimalPaddedTo:10 and:3 at:$.      -> '   123    '  
     '123' decimalPaddedTo:10 and:3 at:$.      -> '   123.000'  
     '123.' decimalPaddedTo:10 and:3 at:$.     -> '   123.000' 
     '123.1' decimalPaddedTo:10 and:3 at:$.    -> '   123.100'
     '123.1' decimalPaddedTo:10 and:3 at:$.    -> '   123.1  ' 
     '123.123' decimalPaddedTo:10 and:3 at:$.  -> '   123.123'
    "

    "Created: 23.12.1995 / 13:11:52 / cg"
!

decimalPaddedTo:size and:afterPeriod at:decimalCharacter withLeft:leftPadChar right:rightPadChar
    "return a new string of overall length size, which contains the receiver
     aligned at the decimal-period column and afterPeriod characters to the right
     of the period.
     Characters on the left are filled with leftPadChar.
     If rightPadChar is nil, characters on the right are filled with leftPadCharacter too;
     otherwise, if missing, a decimal point is added and right characters filled with this.
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged.
     (sounds complicated ? -> see examples below)."

    |s idx n|

    idx := self indexOf:decimalCharacter.
    idx == 0 ifTrue:[
	"/
	"/ no decimal point found; adjust string to the left of the period column
	"/
	rightPadChar isNil ifTrue:[
	    s := self , (self species new:afterPeriod + 1 withAll:leftPadChar)
	] ifFalse:[
	    s:= self , decimalCharacter asString , (self species new:afterPeriod withAll:rightPadChar).
	].
    ] ifFalse:[

	"/ the number of after-decimalPoint characters
	n := self size - idx.
	rightPadChar isNil ifTrue:[
	    s := self , (self species new:afterPeriod - n withAll:leftPadChar).
	] ifFalse:[
	    s := self , (self species new:afterPeriod - n withAll:rightPadChar).
	].
    ].

    ^ s leftPaddedTo:size with:leftPadChar

    "                                                    
     '123' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:nil     -> '   123    '  
     '123' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:$0      -> '   123.000'  
     '123.' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:$0     -> '   123.000' 
     '123.1' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:$0    -> '   123.100'
     '123.1' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:nil   -> '   123.1  ' 
     '123.123' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:$0  -> '   123.123'
    "

    "Modified: 23.12.1995 / 13:08:18 / cg"
!

leftPaddedTo:size
    "return a new string of length size, which contains the receiver
     right-adjusted (i.e. padded on the left).
     Characters on the left are filled with spaces.
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged."

    ^ self leftPaddedTo:size with:(Character space)

    "
     'foo' leftPaddedTo:10  
     'fooBar' leftPaddedTo:5      
     123 printString leftPaddedTo:10        
    "
!

paddedTo:newSize
     "return a new string consisting of the receivers characters,
     plus spaces up to length.
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged."

     ^ self paddedTo:newSize with:(Character space)

    "
     'foo' paddedTo:10            
     123 printString paddedTo:10 
    "
! !

!CharacterArray methodsFor:'pattern matching'!

compoundMatch:aString
    "like match, but the receiver may be a compound match pattern,
     consisting of multiple simple patterns, separated by semicolons.
     This is usable with fileName pattern fields."

    ^self compoundMatch:aString ignoreCase:false

    "
     'f*' match:'foo'       
     'b*' match:'foo'       
     'f*;b*' match:'foo'    
     'f*;b*' match:'bar'    
     'f*;b*' compoundMatch:'foo' 
     'f*;b*' compoundMatch:'bar' 
    "

    "Modified: / 30.1.1998 / 11:40:18 / stefan"
    "Modified: / 16.12.1999 / 01:22:08 / cg"
!

compoundMatch:aString ignoreCase:ignoreCase
    "like match, but the receiver may be a compound match pattern,
     consisting of multiple simple patterns, separated by semicolons.
     This is usable with fileName pattern fields."

    |matchers|

    matchers := self asCollectionOfSubstringsSeparatedBy:$;.
    matchers do:[:aPattern |
        (aPattern match:aString ignoreCase:ignoreCase) ifTrue:[^ true].
    ].
    ^ false.

    "
     'f*' match:'foo'       
     'b*' match:'foo'       
     'f*;b*' match:'foo'    
     'f*;b*' match:'bar'    
     'f*;b*' compoundMatch:'foo' 
     'f*;b*' compoundMatch:'bar' 
     'f*;b*' compoundMatch:'Foo' ignoreCase:true 
    "

    "Modified: / 15.4.1997 / 15:50:33 / cg"
    "Modified: / 30.1.1998 / 11:40:18 / stefan"
    "Created: / 16.12.1999 / 01:21:35 / cg"
!

findMatchString:matchString
    "like findString/indexOfSubCollection, but allowing match patterns.
     find matchstring; if found, return the index;
     if not found, return 0."

    ^ self findMatchString:matchString startingAt:1 ignoreCase:false ifAbsent:0 
!

findMatchString:matchString startingAt:index
    "like findString, but allowing match patterns.
     find matchstring, starting at index. if found, return the index;
     if not found, return 0."

    ^ self findMatchString:matchString startingAt:index ignoreCase:false ifAbsent:0 
!

findMatchString:matchString startingAt:index ignoreCase:ignoreCase ifAbsent:exceptionBlock
    "like findString, but allowing match patterns.
     find matchstring, starting at index. if found, return the index;
     if not found, return the result of evaluating exceptionBlock.
     This is a q&d hack - not very efficient"

    |firstChar firstSet
     startIndex "{ Class: SmallInteger }"
     matchSize  "{ Class: SmallInteger }"
     mySize     "{ Class: SmallInteger }"
     realMatchString|

    matchSize := matchString size.
    matchSize == 0 ifTrue:[^ index]. "empty string matches"

    realMatchString := matchString.
    (realMatchString endsWith:$*) ifFalse:[
        realMatchString := realMatchString , '*'.
        matchSize := matchSize + 1
    ].

    mySize := self size.
    firstChar := realMatchString at:1.
    firstChar == $\ ifTrue:[
        firstChar := realMatchString at:2.
    ].

    firstChar asString includesMatchCharacters ifTrue:[
        index to:mySize do:[:col |
            (realMatchString match:self from:col to:mySize ignoreCase:ignoreCase)
            ifTrue:[^ col]
        ].
        ^ exceptionBlock value.
    ].
    ignoreCase ifTrue:[
        firstSet := String with:(firstChar asUppercase) with:(firstChar asLowercase).
        startIndex := self indexOfAny:firstSet startingAt:index.
    ] ifFalse:[
        startIndex := self indexOf:firstChar startingAt:index.
    ].
    [startIndex == 0] whileFalse:[
        (realMatchString match:self from:startIndex to:mySize ignoreCase:ignoreCase)
        ifTrue:[^ startIndex].
        ignoreCase ifTrue:[
            startIndex := self indexOfAny:firstSet startingAt:(startIndex + 1).
        ] ifFalse:[
            startIndex := self indexOf:firstChar startingAt:(startIndex + 1).
        ].
    ].
    ^ exceptionBlock value

    "
     'one two three four' findMatchString:'o[nu]'
     'one two three four' findMatchString:'o[nu]' startingAt:3
     'one two three four one' findMatchString:'ONE' startingAt:3 ignoreCase:true ifAbsent:0 
    "

    "Modified: 13.9.1997 / 06:31:22 / cg"
!

includesMatchString:matchString
    "like includesString, but allowing match patterns.
     find matchstring; if found, return true, otherwise return false"

    ^ (self findMatchString:matchString) ~~ 0

    "
     'hello world' includesMatchString:'h*'
     'hello world' includesMatchString:'h[aeiou]llo' 
     'hello world' includesMatchString:'wor*'     
     'hello world' includesMatchString:'woR*'     
    "
!

match:aString
    "return true if aString matches self, where self may contain meta-match 
     characters $* (to match any string) or $# (to match any character).
     or [...] to match a set of characters.
     Lower/uppercase are considered different.
     NOTICE: match-meta character interpretation is like in unix-matching, 
             NOT the ST-80 meaning."

    ^ self match:aString from:1 to:aString size ignoreCase:false

    "
     '\*f*' match:'f' 
     '\*f*' match:'*f' 
     '*\*f*' match:'*f' 
     '*f*' match:'*f' 
     '*ute*' match:'computer' 
     '*uter' match:'computer' 
     'uter*' match:'computer' 
     '*ute*' match:'' 
     '[abcd]*' match:'computer' 
     '[abcd]*' match:'komputer' 
     '*some*compl*ern*' match:'this is some more complicated pattern match' 
     '*some*compl*ern*' match:'this is another complicated pattern match' 
     '*-hh' match:'anton-h'
    "

    "Modified: / 9.6.1998 / 18:50:00 / cg"
!

match:aString from:start to:stop ignoreCase:ignoreCase
    "return true if part of aString matches myself, 
     where self may contain meta-match 
     characters $* (to match any string) or $# (to match any character)
     or [...] to match a set of characters.
     If ignoreCase is true, lower/uppercase are considered the same.
     NOTICE: match-meta character interpretation is like in unix-matching, 
             NOT the ST-80 meaning."

    |matchScanArray|

    "
     keep the matchScanArray from the most recent match -
     avoids parsing the pattern over-and over if multiple searches
     are done with the same pattern.
    "
    (PreviousMatch notNil
    and:[PreviousMatch key = self]) ifTrue:[
        matchScanArray := PreviousMatch value
    ] ifFalse:[
        matchScanArray := self class matchScanArrayFrom:self.
        matchScanArray isNil ifTrue:[
            'CharacterArray [info]: invalid matchpattern:''' infoPrint. self infoPrint. ''' comparing for equality.' infoPrintCR.
            ^ self = aString    
"/            ^ false
        ].
        PreviousMatch := self -> matchScanArray.
    ].

    ^ self class
        matchScan:matchScanArray 
        from:1 to:matchScanArray size
        with:aString 
        from:start to:stop 
        ignoreCase:ignoreCase

    "
     '*ute*' match:'12345COMPUTER' from:1 to:5 ignoreCase:true 
     '*ute*' match:'12345COMPUTER' from:6 to:13 ignoreCase:true  
    "

    "Modified: / 10.11.1998 / 21:43:46 / cg"
!

match:aString ignoreCase:ignoreCase
    "return true if aString matches self, where self may contain meta-match 
     characters $* (to match any string) or $# (to match any character)
     or [...] to match a set of characters.
     If ignoreCase is true, lower/uppercase are considered the same.
     NOTICE: match-meta character interpretation is like in unix-matching, 
             NOT the ST-80 meaning."

    ^ self match:aString from:1 to:aString size ignoreCase:ignoreCase

    "
     '*ute*' match:'COMPUTER' ignoreCase:true  
     '*uter' match:'COMPUTER' ignoreCase:false 
     '[abcd]*' match:'computer' ignoreCase:false 
     '[abcd]*' match:'Computer' ignoreCase:false 
     '[a-k]*' match:'komputer' ignoreCase:false   
     '[a-k]*' match:'zomputer' ignoreCase:false    
     '[a-k]*' match:'Komputer' ignoreCase:false    
     '[a-k]*' match:'Komputer' ignoreCase:true     
     '*some*compl*ern*' match:'this is some more complicated pattern match' ignoreCase:true 
     '*some*compl*ern*' match:'this is another complicated pattern match' ignoreCase:true 

     Time millisecondsToRun:[
        Symbol allInstancesDo:[:sym |
            '[ab]*' match:sym ignoreCase:false
        ]
     ]. 
     Time millisecondsToRun:[
        Symbol allInstancesDo:[:sym |
            '*at:*' match:sym ignoreCase:false
        ]
     ]. 
    "

    "Modified: 2.4.1997 / 17:28:58 / cg"
! !

!CharacterArray methodsFor:'printing & storing'!

article
    "return an article string for the receiver."

    |firstChar|

    firstChar := (self at:1) asLowercase. 
    (firstChar isVowel or:[firstChar == $x]) ifTrue:[
	firstChar ~~ $u ifTrue:[
	     ^ 'an'
	]
    ].
    ^ 'a'
!

displayString
    "return a string to display the receiver - use storeString to have
     quotes around."

    ^ self storeString
!

printOn:aStream
    "print the receiver on aStream"

    aStream nextPutAll:self
!

printString
    "return a string for printing - thats myself"

    ^ self
!

printWithQuotesDoubledOn:aStream
    "put the raw storeString of myself on aStream"

    self do:[:thisChar |
        (thisChar == $') ifTrue:[aStream nextPut:thisChar].
        aStream nextPut:thisChar
    ]

    "Modified: / 15.6.1998 / 17:21:17 / cg"
    "Created: / 15.6.1998 / 17:22:13 / cg"
! !

!CharacterArray methodsFor:'queries'!

bitsPerCharacter
    "return the underlying strings bitsPerCharacter 
     (i.e. is it a regular String or a TwoByteString)"

    ^ self string bitsPerCharacter

    "
     'hello' bitsPerCharacter        
     'hello' asText allBold bitsPerCharacter 
    "
!

encoding
    "return the strings encoding, as a symbol.
     Here, by default, we assume #iso8859 encoding.
     This has to be redefined in XXXEncodedString subclasses."

    ^ #iso8859

    "Modified: 27.4.1996 / 13:23:49 / cg"
!

hasChangeOfEmphasis
    ^ false

    "Created: 12.5.1996 / 12:31:39 / cg"
!

heightOn:aGC
    "return the size of the recevier in device units if displayed on aGC"

    ^ (aGC font on:aGC device) heightOf:self

    "
     'hello world' heightOn:(View new)                 
    "

    "Created: 12.5.1996 / 20:09:29 / cg"
    "Modified: 12.5.1996 / 20:32:05 / cg"
!

isString
    "return true, if the receiver is some kind of string;
     true is returned here - redefinition of Object>>isString."

    ^ true
!

leftIndent
    "if the receiver starts with spaces, return the number of spaces
     at the left - otherwise, return 0.
     If the receiver consists of spaces only, return the receivers size."

    |index "{Class: SmallInteger }"
     end   "{Class: SmallInteger }"|

    index := 1.
    end := self size.
    [index <= end] whileTrue:[
        (self at:index) isSeparator ifFalse:[^ index - 1].
        index := index + 1
    ].
    ^ end

    "
     '    hello' leftIndent 
     'foo      ' leftIndent 
     '         ' leftIndent 
    "

    "Modified: 20.4.1996 / 19:28:43 / cg"
!

widthOn:aGC
    "return ths size of the recevier in device units if displayed on aGC"

    ^ (aGC font on:aGC device) widthOf:self

    "
     'hello world' widthOn:(View new)                 
    "

    "Created: 12.5.1996 / 20:09:29 / cg"
    "Modified: 17.4.1997 / 12:50:23 / cg"
! !


!CharacterArray methodsFor:'special string converting'!

chopTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     in the middle have been removed for a total string length
     of maxLen."

    |sz n1 n2|

    (sz := self size) > maxLen ifTrue:[
	n1 := n2 := maxLen // 2.
	maxLen odd ifTrue:[
	    n2 := n1 + 1
	].
	^ (self copyFrom:1 to:n1) , (self copyFrom:sz - n2 + 1)
    ]

    "
     '12345678901234'   chopTo:15            
     '123456789012345'  chopTo:15         
     '1234567890123456' chopTo:15      
     'aShortString' chopTo:15 
     'aVeryLongNameForAStringThatShouldBeShortened' chopTo:15 
    "
!

contractAtBeginningTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     at the beginning have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz|

    (sz := self size) > maxLen ifTrue:[
	^ '...' , (self copyFrom:(sz - (maxLen - 4))) 
    ]

    "
     '12345678901234' contractAtBeginningTo:15          
     '123456789012345' contractAtBeginningTo:15          
     '1234567890123456' contractAtBeginningTo:15          
     'aShortString' contractAtBeginningTo:15          
     'aVeryLongNameForAStringThatShouldBeShortened' contractAtBeginningTo:15
    "
!

contractAtEndTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     at the end have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz|

    (sz := self size) > maxLen ifTrue:[
	^ self copyReplaceFrom:maxLen - 3
			    with:'...'
    ]

    "
     '12345678901234' contractAtEndTo:15          
     '123456789012345' contractAtEndTo:15          
     '1234567890123456' contractAtEndTo:15          
     'aShortString' contractAtEndTo:15          
     'aVeryLongNameForAStringThatShouldBeShortened' contractAtEndTo:15 
    "
!

contractTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     in the middle have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz "{ SmallInteger }"
     halfSize "{ SmallInteger }"|

    (sz := self size) > maxLen ifTrue:[
	halfSize := maxLen // 2.
	^ self copyReplaceFrom:halfSize - 1
			    to:sz - maxLen + halfSize + 1
			    with:'...'
    ]

    "
     '12345678901234' contractTo:15          
     '123456789012345' contractTo:15          
     '1234567890123456' contractTo:15        
     'aShortString' contractTo:15 
     'aVeryLongNameForAStringThatShouldBeShortened' contractTo:15 
    "
!

expandPlaceholdersWith:argArrayOrDictionary
    "return a copy of the receiver, where all %i escapes are
     replaced by corresponding arguments' printStrings from the argArrayOrDictionary.
     I.e. 'hello %1; how is %2' expandPlaceholdersWith:#('world' 'this') results
     in the new string 'hello world; how is this'.
     As an extension, the argument may also be a dictionary, providing
     values for character keys.
     In this case, %a .. %z and %(...) are also allowed.
     (%1..%9 require a numeric key in the dictionary, however)
     To get a '%' character, use a '%%'-escape.
     See also bindWith:... for VisualAge compatibility."

    |expandedString next v key
     idx   "{ SmallInteger }"
     idx2  "{ SmallInteger }"
     start "{ SmallInteger }"
     stop  "{ SmallInteger }"|

    expandedString := self species new:0.
    stop := self size.
    start := 1.
    [start <= stop] whileTrue:[
        idx := self indexOf:$% startingAt:start.
        (idx == 0 or:[idx == stop]) ifTrue:[
            ^ expandedString , (self copyFrom:start to:stop)
        ].
        "found a %"
        expandedString := expandedString , (self copyFrom:start to:(idx - 1)).
        next := self at:(idx + 1).
        (next == $%) ifTrue:[
            expandedString := expandedString , '%'
        ] ifFalse:[
            (next between:$1 and:$9) ifTrue:[
                v := argArrayOrDictionary at:(next digitValue)
            ] ifFalse:[
                next == $( ifTrue:[
                    idx2 := self indexOf:$) startingAt:idx+2.
                    key := self copyFrom:idx+2 to:idx2-1.
                    idx := idx2 - 1.
                    v := argArrayOrDictionary at:key asSymbol
                ] ifFalse:[
                    v := argArrayOrDictionary at:next
                ]
            ].
            expandedString := expandedString , v printString
        ].
        start := idx + 2
    ].
    ^  expandedString

    "
     'hello %1' expandPlaceholdersWith:#('world') 
     'hello %1; how is %2' expandPlaceholdersWith:#('world' 'this') 
     'hello %2; how is %1' expandPlaceholdersWith:#('world' 'this') 
     '%1 plus %2 gives %3 ' expandPlaceholdersWith:#(4 5 9) 
    "

    "
     |dict|

     dict := Dictionary new.
     dict at:1 put:'one'.
     dict at:$a put:'AAAAA'.
     'hello %1 %a' expandPlaceholdersWith:dict 
    "

    "Modified: 1.7.1997 / 00:53:24 / cg"
!

withCRs
    "return a new string consisting of receivers characters
     with all \-characters replaced by cr-characters."

    ^ self copyReplaceAll:$\ with:(Character cr)

    "
     'hello\world' withCRs
    "

    "Modified: / 18.7.1998 / 22:53:02 / cg"
!

withEscapes
    "return a new string consisting of receivers characters
     with all \X-character escapes replaced by corresponding-characters.
     (similar to the way C-language Strings are converted).
     The following escapes are supported:
        \r      return character
        \n      newline character
        \b      backspace character
        \f      formfeed character
        \t      tab character
        \e      escape character
        \\      the \ character itself
        \nnn    three digit octal number defining the characters ascii value
        \other  other

     Notice, that \' is NOT a valid escape, since the general syntax of
     string constants is not affected by this method.

     Although easily implementable, this is NOT done automatically
     by the compiler (due to a lack of a language standard for this).
     However, the compiler may detect sends ot #withEscapes to string literals
     and place a modified string constant into the binary/byte-code.
     Therefore, no runtime penalty will be payed for using these escapes.
     (not in pre 2.11 versions)
    "

    |sz      "{ SmallInteger }"
     newSize "{ SmallInteger }"
     srcIdx  "{ SmallInteger }"
     dstIdx  "{ SmallInteger }"
     val     "{ SmallInteger }"
     newString next hasEmphasis e| 

    "
     first, count the number of escapes, to allow preallocation
     of the new string ...
     (it is faster to scan the string twice than to reallocate it multiple
      times in a WriteStream)
    "
    sz := newSize := self size.
    srcIdx := 1.
    [(srcIdx := self indexOf:$\ startingAt:srcIdx) ~~ 0] whileTrue:[
        srcIdx == sz ifFalse:[
            newSize := newSize - 1.
            srcIdx := srcIdx + 1.
            next := self at:srcIdx.
            next == $0 ifTrue:[
                [srcIdx < sz and:[next isDigit]] whileTrue:[
                    newSize := newSize - 1. srcIdx := srcIdx + 1. next := self at:srcIdx.
                ]
            ].
        ].
        srcIdx := srcIdx + 1.
    ].

    newSize == sz ifTrue:[
        ^ self
    ].

    newString := self species new:newSize.

    hasEmphasis := self hasChangeOfEmphasis.

    "
     copy over, replace escapes
    "
    srcIdx := dstIdx := 1.
    [srcIdx <= sz] whileTrue:[
        next := self at:srcIdx.
        hasEmphasis ifTrue:[
            e := self emphasisAt:srcIdx
        ].
        srcIdx := srcIdx + 1.
        next == $\ ifTrue:[
            srcIdx <= sz ifTrue:[
                next := self at:srcIdx.
                srcIdx := srcIdx + 1.
                next == $r ifTrue:[
                    next := Character return
                ] ifFalse:[
                    next == $n ifTrue:[
                        next := Character nl
                    ] ifFalse:[
                        next == $b ifTrue:[
                            next := Character backspace
                        ] ifFalse:[
                            next == $f ifTrue:[
                                next := Character newPage
                            ] ifFalse:[
                                next == $t ifTrue:[
                                    next := Character tab
                                ] ifFalse:[
                                    next == $e ifTrue:[
                                        next := Character esc
                                    ] ifFalse:[
                                        next == $0 ifTrue:[
                                            val := 0.
                                            [next notNil and:[next isDigit]] whileTrue:[
                                                val := val * 8 + next digitValue.
                                                srcIdx <= sz ifTrue:[
                                                    next := self at:srcIdx.
                                                    srcIdx := srcIdx + 1.
                                                ] ifFalse:[
                                                    next := nil
                                                ]
                                            ].
                                            next := Character value:val.
                                        ]
                                    ]
                                ]
                            ]
                        ]
                    ]
                ].
            ].
        ].
        newString at:dstIdx put:next.
        hasEmphasis ifTrue:[
            newString emphasisAt:dstIdx put:e
        ].
        dstIdx := dstIdx + 1.
    ].
    ^ newString

    "
     'hello world' withEscapes  
     'hello\world' withEscapes   
     'hello\world\' withEscapes   
     'hello world\' withEscapes   
     'hello\tworld' withEscapes   
     'hello\nworld\na\n\tnice\n\t\tstring' withEscapes   
     'hello\tworld\n' withEscapes   
     'hello\010world' withEscapes   
     'hello\r\nworld' withEscapes   
    "

    "Modified: 12.5.1996 / 12:53:34 / cg"
!

withMatchEscapes
    "return a copy of the receiver with all match characters escaped
     by $\ characters (to be usable as a match string).
     Return the receiver, if there are none."

    |in out c|

    in := self readStream.
    out := '' writeStream.
    [in atEnd] whileFalse:[
        c := in next.
        ('*[#\' includes:c) ifTrue:[
            out nextPut:$\.
        ].
        out nextPut:c.
    ].
    ^ out contents.

    "
     '*foo' withMatchEscapes   
     '\*foo' withMatchEscapes  
     '*foo' withMatchEscapes   
     '\\*foo' withMatchEscapes 
     'foo*' withMatchEscapes   
     'foo\*' withMatchEscapes  
     'foo\' withMatchEscapes  
     'f*o*o' withMatchEscapes   
    "

    "Modified: 2.4.1997 / 18:13:04 / cg"
!

withTabs
    "return a string consisting of the receivers characters
     where leading spaces are replaced by tabulator characters (assuming 8-col tabs).
     Notice: if the receiver does not contain any tabs, it is returned unchanged;
     otherwise a new string is returned.
     Limitation: only the very first spaces are replaced 
		 (i.e. if the receiver contains newLine characters,
		  no tabs are inserted after those lineBreaks)"

    |idx   "{ SmallInteger }" 
     nTabs "{ SmallInteger }" 
     newString|

    idx := self findFirst:[:c | (c ~~ Character space)].
    nTabs := (idx-1) // 8.
    nTabs == 0 ifTrue:[^ self].

    "any tabs"
    newString := self class new:(self size - (nTabs * 7)).
    newString atAll:(1 to:nTabs) put:(Character tab).
    newString replaceFrom:(nTabs + 1) with:self startingAt:(nTabs * 8 + 1).
    ^ newString

    "
     '12345678901234567890' withTabs 
     '       8901234567890' withTabs 
     '        901234567890' withTabs  
     '               67890' withTabs
     '                7890' withTabs
     '                 890' withTabs
    "
!

withTabsExpanded
    "return a string consisting of the receivers characters,
     where all tabulator characters are expanded into spaces (assuming 8-col tabs). 
     Notice: if the receiver does not contain any tabs, it is returned unchanged;
     otherwise a new string is returned.
     This does handle multiline strings."

    |col    "{ SmallInteger }" 
     str ch
     dstIdx "{ SmallInteger }"
     newSz  "{ SmallInteger }"
     sz "{ SmallInteger }"
     hasEmphasis e|

    (self includes:(Character tab)) ifFalse:[^ self].

    sz := self size.

    "/ count the new size first, instead of
    "/ multiple resizing (better for large strings)

    col := 1. newSz := 0.
    1 to:sz do:[:srcIdx |
        ch := self at:srcIdx.
        ch == Character tab ifFalse:[
            col := col + 1.
            newSz := newSz + 1.
            ch == Character cr ifTrue:[
                col := 1
            ].
        ] ifTrue:[
            (col \\ 8) to:8 do:[:ii |
                newSz := newSz + 1.
                col := col + 1
            ].
        ]
    ].

    str := self species new:newSz.

    hasEmphasis := self hasChangeOfEmphasis.

    col := 1. dstIdx := 1.
    1 to:sz do:[:srcIdx |
        ch := self at:srcIdx.

        ch == Character tab ifFalse:[
            col := col + 1.
            ch == Character cr ifTrue:[
                col := 1
            ].
            hasEmphasis ifTrue:[
                e := self emphasisAt:srcIdx.
                str emphasisAt:dstIdx put:e
            ].
            str at:dstIdx put:ch.
            dstIdx := dstIdx + 1
        ] ifTrue:[
            (col \\ 8) to:8 do:[:ii |
                str at:dstIdx put:Character space.
                dstIdx := dstIdx + 1.
                col := col + 1
            ].
        ]
    ].
    ^ str

    "
     ('1' , Character tab asString , 'x') withTabsExpanded          
     ('12345' , Character tab asString , 'x') withTabsExpanded      
     ('123456' , Character tab asString , 'x') withTabsExpanded     
     ('1234567' , Character tab asString , 'x') withTabsExpanded   
     ('12345678' , Character tab asString , 'x') withTabsExpanded   
     ('123456789' , Character tab asString , 'x') withTabsExpanded 

     (String with:Character tab
             with:Character tab
             with:$1) withTabsExpanded

     (String with:Character tab
             with:$1
             with:Character tab
             with:$2) withTabsExpanded  

     (String with:Character tab
             with:$1
             with:Character cr
             with:Character tab
             with:$2) withTabsExpanded  
    "

    "Modified: 12.5.1996 / 13:05:10 / cg"
!

withoutCRs
    "return a new collection consisting of receivers elements
     with all cr-characters replaced by \-characters.
     This is the reverse operation of withCRs."

    ^ self copyReplaceAll:(Character cr) with:$\
    "
     'hello
world' withoutCRs
    "

    "Modified: / 18.7.1998 / 22:53:08 / cg"
!

withoutLeadingSeparators
    "return a copy of myself without leading separators.
     Notice: this does remove tabs, newline or any other whitespace.
     Returns an empty string, if the receiver consist only of whitespace."

    |index|

    index := self indexOfNonSeparatorStartingAt:1.
    index ~~ 0 ifTrue:[
	index == 1 ifTrue:[
	    ^ self
	].
	^ self copyFrom:index
    ].
    ^ ''

    "
     '    foo    ' withoutLeadingSeparators  
     'foo    '     withoutLeadingSeparators   
     '    foo'     withoutLeadingSeparators  
     '       '     withoutLeadingSeparators   
     'foo'         withoutLeadingSeparators   
     ('  ' , Character tab asString , ' foo   ') withoutLeadingSeparators inspect 
    "
!

withoutMatchEscapes
    "return a copy of the receiver with all $\ removed or
     the receiver, if there are none."

    |in out c|

    in := self readStream.
    out := self species new writeStream.
    [in atEnd] whileFalse:[
        c := in next.
        c == $\ ifTrue:[
            in atEnd ifFalse:[
                c := in next.
            ]
        ].
        out nextPut:c.
    ].
    ^ out contents.

    "
     '*foo' withoutMatchEscapes   
     '\*foo' withoutMatchEscapes  
     '*foo' withoutMatchEscapes   
     '\\*foo' withoutMatchEscapes 
     'foo*' withoutMatchEscapes   
     'foo\*' withoutMatchEscapes  
     'foo\' withoutMatchEscapes  
     'f\*o\*o' withoutMatchEscapes   
    "

    "Modified: 30.6.1997 / 13:40:23 / cg"
!

withoutSeparators
    "return a copy of myself without leading and trailing whitespace.
     (but whiteSpace in-between is preserved)
     Whitespace is space, tab, newline, formfeed.
     Use withoutSpaces, if you want to remove spaces only."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz|

    sz := self size.
    startIndex := 1.
    endIndex := sz.

    [(startIndex < endIndex) and:[(self at:startIndex) isSeparator]] whileTrue:[
        startIndex := startIndex + 1
    ].
    [(endIndex > 1) and:[(self at:endIndex) isSeparator]] whileTrue:[
        endIndex := endIndex - 1
    ].
    startIndex > endIndex ifTrue:[
        ^ ''
    ].
    ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
        ^ self
    ].
    ^ self copyFrom:startIndex to:endIndex

    "
     '    foo    ' withoutSeparators      
     '    foo' withoutSeparators      
     'foo    ' withoutSeparators      
     '       ' withoutSeparators      
     ('  foo' , Character tab asString , '    ') withoutSeparators inspect 
    "
!

withoutSpaces
    "return a copy of myself without leading and trailing spaces.
     (but spaces in-between are preserved)
     Notice: this does NOT remove tabs, newline or any other whitespace.
     Use withoutSeparators for this."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz|

    sz := self size.
    startIndex := 1.
    endIndex := sz.

    [(startIndex < endIndex) and:[(self at:startIndex) == Character space]] whileTrue:[
        startIndex := startIndex + 1
    ].
    [(endIndex > 1) and:[(self at:endIndex) == Character space]] whileTrue:[
        endIndex := endIndex - 1
    ].
    startIndex > endIndex ifTrue:[
        ^ ''
    ].
    ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
        ^ self
    ].
    ^ self copyFrom:startIndex to:endIndex

    "
     '    foo    ' withoutSpaces  
     'foo    '     withoutSpaces   
     '    foo'     withoutSpaces  
     '       '     withoutSpaces   
     'a     b'     withoutSpaces   
     ('  foo' , Character tab asString , '    ') withoutSpaces inspect 
    "
!

withoutTrailingSeparators
    "return a copy of myself without trailing separators.
     Notice: this does remove tabs, newline or any other whitespace.
     Returns an empty string, if the receiver consist only of whitespace."

    |index|

    index := self size.
    [index ~~ 0] whileTrue:[
	(self at:index) isSeparator ifFalse:[
	    ^ self copyTo:index
	].
	index := index - 1
    ].
    ^ ''

    "
     '    foo    ' withoutTrailingSeparators  
     'foo    '     withoutTrailingSeparators   
     '    foo'     withoutTrailingSeparators  
     '       '     withoutTrailingSeparators   
     'foo'         withoutTrailingSeparators   
     ('  ' , Character tab asString , ' foo   ') withoutTrailingSeparators inspect 
     ('   foo' , Character tab asString) withoutTrailingSeparators inspect 
    "
! !

!CharacterArray methodsFor:'substring searching'!

findString:subString
    "find a substring. if found, return the index;
     if not found, return 0."

    ^ self indexOfSubCollection:subString startingAt:1 ifAbsent:0

    "
     'hello world' findString:'llo'   
     'hello world' findString:'ole'  
    "
!

findString:subString ifAbsent:exceptionBlock
    "find a substring. If found, return the index;
     if not found, return the result of evaluating exceptionBlock."

    ^ self indexOfSubCollection:subString startingAt:1 ifAbsent:exceptionBlock
!

findString:subString startingAt:index
    "find a substring, starting at index. if found, return the index;
     if not found, return 0."

    ^ self indexOfSubCollection:subString startingAt:index ifAbsent:0

    "
     'hello yello' findString:'llo' startingAt:1   
     'hello yello' findString:'llo' startingAt:5   
     'hello yello' findString:'llo' startingAt:15   
    "
!

findString:subString startingAt:index ifAbsent:exceptionBlock
    "find a substring, starting at index. if found, return the index;
     if not found, return the result of evaluating exceptionBlock."

    ^ self indexOfSubCollection:subString startingAt:index ifAbsent:exceptionBlock
!

includesString:aString
    "return true, if a substring is contained in the receiver"

    ^ (self indexOfSubCollection:aString startingAt:1 ifAbsent:0) ~~ 0

    "
     'hello world' includesString:'hel' 
     'hello world' includesString:'rld' 
     'hello world' includesString:'llo'  
     'hello world' includesString:'LLO'   
    "
!

indexOfSubCollection:subString startingAt:index ifAbsent:exceptionBlock
    "find a substring, starting at index. if found, return the index;
     if not found, return the result of evaluating exceptionBlock.
     This is a q&d hack - not very efficient"

    |firstChar found
     startIndex "{ Class: SmallInteger }"
     subSize    "{ Class: SmallInteger }"
     mySize     "{ Class: SmallInteger }"
     runIdx     "{ Class: SmallInteger }" |

    subSize := subString size.
    subSize == 0 ifTrue:[^ index]. "empty string matches"
    mySize := self size.
    firstChar := subString at:1.
    startIndex := self indexOf:firstChar startingAt:index.
    [startIndex == 0] whileFalse:[
        runIdx := startIndex.
        found := true.
        1 to:subSize do:[:i |
            runIdx > mySize ifTrue:[
                found := false
            ] ifFalse:[
                (subString at:i) ~= (self at:runIdx) ifTrue:[
                    found := false
                ]
            ].
            runIdx := runIdx + 1
        ].
        found ifTrue:[
            ^ startIndex
        ].
        startIndex := self indexOf:firstChar startingAt:(startIndex + 1)
    ].
    ^ exceptionBlock value

    "Modified: 23.2.1996 / 15:35:15 / cg"
!

restAfter:keyword withoutSeparators:strip
    "compare the left of the receiver with keyword,
     if it matches return the right. 
     Finally, if strip is true, remove whiteSpace.
     This method is used to match and extract lines of the form:
	something: rest
     where we are interested in rest, but only if the receiver string
     begins with something. 

     You may wonder why such a specialized method exists here
     - this is so common when processing mailboxes,
     rcs files, nntp/pop3 responses, that is was considered worth
     a special method here to avoid having the code below a hundred
     times in variuos places."

    |rest|

    (self startsWith:keyword) ifTrue:[
	rest := self copyFrom:(keyword size + 1).
	strip ifTrue:[
	    rest := rest withoutSeparators
	].
	^ rest
    ].
    ^ nil

    "
     'foo: hello world' restAfter:'foo:' withoutSeparators:true 
     'funny: something' restAfter:'foo:' withoutSeparators:true 

     'foo:     hello world    ' restAfter:'foo:' withoutSeparators:true 
     'foo:     hello world    ' restAfter:'foo:' withoutSeparators:false 
    "

    "Created: 25.11.1995 / 11:04:18 / cg"
! !

!CharacterArray methodsFor:'testing'!

continuesWith:aString startingAt:startIndex
    "return true, if the receiver beginning at startIndex
     contains the characters in aString."

    |sz  "{Class: SmallInteger }"
     idx "{Class: SmallInteger }"|

    sz := aString size.
    idx := startIndex.

    1 to:sz do:[:i |
        (self at:idx) ~~ (aString at:i) ifTrue:[^ false].
        idx := idx + 1
    ].
    ^ true

    "
     'hello world' continuesWith:'world' startingAt:6                
     'hello world' continuesWith:'world' startingAt:7                
    "

    "Created: 12.5.1996 / 15:46:40 / cg"
    "Modified: 26.7.1996 / 19:08:36 / cg"
!

countWords
    "return the number of words, which are separated by separators"

    |tally "{ Class: SmallInteger }"
     start "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"
     stop ch|

    tally := 0.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	ch := self at:start.
	ch isSeparator ifTrue:[
	    start := start + 1
	] ifFalse:[
	    stop := self indexOfSeparatorStartingAt:start.
	    (stop == 0) ifTrue:[
		stop := mySize + 1
	    ].
	    tally := tally + 1.
	    start := stop
	]
    ].
    ^ tally

    "
     'hello world isnt this nice' countWords'
    "
!

endsWith:aString
    "return true, if the receiver ends with something, aString."

    |s|

    (s := self string) ~~ self ifTrue:[
        ^ s endsWith:aString
    ].
    ^ super endsWith:aString

    "
     'hello world' endsWith:'world'                 
     'hello world' asText allBold endsWith:'world'  
    "

    "Modified: 12.5.1996 / 15:49:18 / cg"
!

isAlphaNumeric
    "return true, if the receiver is some alphanumeric word;
     i.e. consists of a letter followed by letters or digits."

    self size == 0 ifTrue:[
	"mhmh what is this ?"
	^ false
    ].
    (self at:1) isLetter ifFalse:[^ false].
    self do:[:char |
	char isLetterOrDigit ifFalse:[^ false].
    ].
    ^ true

    "
     'helloWorld' isAlphaNumeric  
     'foo1234' isAlphaNumeric    
     'f1234' isAlphaNumeric      
     '1234' isAlphaNumeric       
     '+' isAlphaNumeric         
    "
!

isBlank
    "return true, if the receiver contains spaces only"

    self do:[:char |
	char ~~ Character space ifTrue:[^ false].
    ].
    ^ true
!

levenshteinTo:aString
    "return the levenshtein distance to the argument, aString;
     this value corrensponds to the number of replacements that have to be
     made to get aString from the receiver.
     See IEEE transactions on Computers 1976 Pg 172 ff."

    "
     in the following, we assum that ommiting a character
     is less of an error than inserting an extra character.
     Therefore the different insertion (i) and deletion (d)
     values.
    "

    ^ self levenshteinTo:aString s:4 c:1 i:2 d:6

    "
     'ocmprt' levenshteinTo:'computer'
     'computer' levenshteinTo:'computer'
     'ocmputer' levenshteinTo:'computer'
     'cmputer' levenshteinTo:'computer'
     'computer' levenshteinTo:'cmputer'
     'Computer' levenshteinTo:'computer'
    "
!

levenshteinTo:aString s:substWeight c:caseWeight i:insrtWeight d:deleteWeight
    "parametrized levenshtein. 
     return the levenshtein distance to the argument, aString;
     this value corrensponds to the number of replacements that have to be
     made to get aString from the receiver.
     The arguments are the costs for substitution, case-change, insertion and 
     deletion of a character.
     See IEEE transactions on Computers 1976 Pg 172 ff"

    |d  "delta matrix"
     len1 "{ Class: SmallInteger }"
     len2 "{ Class: SmallInteger }"
     dim  "{ Class: SmallInteger }"
     prevRow row col 
     dimPlus1 "{ Class: SmallInteger }"
     min pp c1 c2|

    len1 := self size.
    len2 := aString size.

    "create the help-matrix"

    dim := len1 max:len2.
    dimPlus1 := dim + 1.

    d := Array new:dimPlus1.
    1 to:dimPlus1 do:[:i |
	d at:i put:(Array new:dimPlus1)
    ].

    "init help-matrix"

    (d at:1) at:1 put:0.
    row := d at:1.
    1 to:dim do:[:j |
	row at:(j + 1) put:( (row at:j) + insrtWeight )
    ].

    1 to:dim do:[:i |
	 (d at:(i + 1)) at:1 put:(  ((d at:i) at:1) + deleteWeight )
    ].

    1 to:len1 do:[:i |
	c1 := self at:i.
	1 to:len2 do:[:j |
	    c2 := aString at:j.
	    (c1 == c2) ifTrue:[
		pp := 0
	    ] ifFalse:[
		(c1 asLowercase == c2 asLowercase) ifTrue:[
		    pp := caseWeight
		] ifFalse:[
		    pp := substWeight
		]
	    ].
	    prevRow := d at:i.
	    row := d at:(i + 1).
	    col := j + 1.
	    min := (prevRow at:j) + pp.
	    min := min min:( (row at:j) + insrtWeight).
	    min := min min:( (prevRow at:col) + deleteWeight).
	    row at:col put: min
	]
    ].

    ^ (d at:(len1 + 1)) at:(len2 + 1)
!

numArgs
    "treating the receiver as a message selector, return how many arguments would it take"

    |binopChars firstChar|

    (self size > 2) ifFalse:[
        binopChars := Scanner binarySelectorCharacters.
        firstChar := self at:1.

        (self size == 1) ifTrue:[
            ((binopChars occurrencesOf:firstChar) == 0) ifTrue:[^ 0].
            ^ 1
        ].
        ((binopChars occurrencesOf:firstChar) == 0) ifFalse:[
            ((binopChars occurrencesOf:(self at:2)) == 0) ifFalse:[^ 1]
        ]
    ].
    ^ self occurrencesOf:$:

    "
     'foo:bar:' numArgs  
     #foo:bar: numArgs    
     'hello' numArgs       
     '+' numArgs   
     '|' numArgs   
     '?' numArgs   
    "

    "Modified: 4.1.1997 / 14:16:14 / cg"
!

partsIfSelector
    "treat the receiver as a message selector, return a collection of parts."

    |idx1 "{ Class: SmallInteger }"
     coll idx2 sz|

    coll := OrderedCollection new.
    idx1 := 1.
    sz := self size.
    [true] whileTrue:[
	idx2 := self indexOf:$: startingAt:idx1 + 1.
	(idx2 == 0 or:[idx2 == sz]) ifTrue:[
	    coll add:(self copyFrom:idx1).
	    ^ coll
	].
	coll add:(self copyFrom:idx1 to:idx2).
	idx1 := idx2 + 1
    ].

    "
     'foo:bar:' partsIfSelector     
     #foo:bar: partsIfSelector     
     'hello' partsIfSelector       
     '+' partsIfSelector           
    "
!

spellAgainst: aString 
    "return an integer between 0 and 100 indicating how similar 
     the argument is to the receiver.  No case conversion is done.
     This algorithm is much simpler (but also less exact) than the
     levenshtein distance. Experiment which is better for your
     application."

    | i1     "{ Class: SmallInteger }"
      i2     "{ Class: SmallInteger }"
      next1  "{ Class: SmallInteger }"
      next2  "{ Class: SmallInteger }"
      size1  "{ Class: SmallInteger }"
      size2  "{ Class: SmallInteger }"
      score  "{ Class: SmallInteger }"
      maxLen "{ Class: SmallInteger }" |

    size1 := self size.
    size2 := aString size.
    maxLen := size1 max:size2.
    score := 0.
    i1 := i2 := 1.
    [i1 <= size1 and: [i2 <= size2]] whileTrue:[
	next1 := i1 + 1.
	next2 := i2 + 1.
	(self at:i1) == (aString at:i2) ifTrue: [
	    score := score+1.             
	    i1 := next1.                    
	    i2 := next2
	] ifFalse: [
	    (i2 < size2 and: [(self at:i1) == (aString at:next2)]) ifTrue: [
		i2 := next2
	    ] ifFalse: [
		(i1 < size1 and: [(self at:next1) == (aString at:i2)]) ifTrue: [
		    i1 := next1
		] ifFalse: [
		    i1 := next1.
		    i2 := next2
		] 
	    ] 
	] 
    ].

    score == maxLen ifTrue: [^ 100].
    ^ 100 * score // maxLen

    " 
     'Smalltalk' spellAgainst: 'Smalltlak' 
     'Smalltalk' spellAgainst: 'smalltlak' 
     'Smalltalk' spellAgainst: 'smalltalk' 
     'Smalltalk' spellAgainst: 'smalltlk'  
     'Smalltalk' spellAgainst: 'Smalltolk'   
    "
!

startsWith:aString
    "return true, if the receiver starts with something, aString.
     If the argument is empty, true is returned."

    |s|

    (s := self string) ~~ self ifTrue:[
        ^ s startsWith:aString
    ].
    ^ super startsWith:aString

    "
     'hello world' startsWith:'hello'                 
     'hello world' asText allBold startsWith:'hello'  
     'hello world' asText allBold startsWith:''  
    "

    "Created: 12.5.1996 / 15:46:40 / cg"
    "Modified: 12.5.1996 / 15:49:24 / cg"
! !

!CharacterArray methodsFor:'tracing'!

traceInto:aRequestor level:level from:referrer
    "double dispatch into tracer, passing my type implicitely in the selector"

    ^ aRequestor traceCharacterArray:self level:level from:referrer


! !

!CharacterArray class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.175 1999-12-29 11:26:18 cg Exp $'
! !
CharacterArray initialize!