CharacterArray.st
author Claus Gittinger <cg@exept.de>
Fri, 08 Jul 2005 19:15:03 +0200
changeset 8913 b9498d27a554
parent 8900 8b669296f3eb
child 8938 860bbcbd3bd8
permissions -rw-r--r--
64bit; mkSmallInteger

"
 COPYRIGHT (c) 1994 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

"{ Package: 'stx:libbasic' }"

ByteArray variableByteSubclass:#CharacterArray
	instanceVariableNames:''
	classVariableNames:'PreviousMatch DecoderTables EncoderTables DecodingFailedSignal
		EncodingFailedSignal'
	poolDictionaries:''
	category:'Collections-Text'
!

!CharacterArray class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1994 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    CharacterArray is a superclass for all kinds of Strings (i.e.
    (singleByte-)Strings, TwoByteStrings and whatever comes in the future.

    This class is abstract, meaning that there are no instances of it.
    All this class does is provide common protocol for concrete subclasses.

    [author:]
	Claus Gittinger

    [see also:]
	String TwoByteString
	StringCollection
"
! !

!CharacterArray class methodsFor:'initialization'!

initialize
    DecodingFailedSignal isNil ifTrue:[
	DecodingFailedSignal := DecodingError.
	DecodingFailedSignal notifierString:'error during decode'.

	EncodingFailedSignal :=EncodingError.
	EncodingFailedSignal notifierString:'error during encode'.
    ]

    "
     CharacterArray initialize
    "

    "Modified: 3.8.1997 / 18:15:59 / cg"
! !

!CharacterArray class methodsFor:'instance creation'!

basicNew
    "return a new empty string"

    ^ self basicNew:0
!

fromBytes:aByteCollection
    "return an instance of the receiver class,
     taking untranslated bytes from the argument, aByteCollection.
     Only useful, when reading twoByteStrings from external sources."

    |mySize nBytes newString dstIdx|

    nBytes := aByteCollection size.
    mySize := self basicNew bitsPerCharacter.
    mySize == 16 ifTrue:[
	newString := self basicNew:(nBytes // 2).
	dstIdx := 1.
	aByteCollection pairWiseDo:[:hi :lo |
	    newString at:dstIdx put:(Character value:(hi bitShift:8)+lo).
	    dstIdx := dstIdx + 1
	].
	^ newString.
    ].

    ^ (self basicNew:nBytes) replaceFrom:1 with:aByteCollection

    "
     TwoByteString fromBytes:#[16r21 16r21]
    "

    "Modified: 30.6.1997 / 20:08:37 / cg"
!

fromString:aString
    "return a copy of the argument, aString"

    ^ (self basicNew:(aString size)) replaceFrom:1 with:aString

    "TwoByteString fromString:'hello'"
!

fromStringCollection:aCollectionOfStrings
    "return new string formed by concatenating a copy of the argument, aString"

    ^ self fromStringCollection:aCollectionOfStrings separatedBy:''

    "
     String fromStringCollection:#('hello' 'world' 'how' 'about' 'this')
    "

    "Created: 20.11.1995 / 15:26:59 / cg"
!

fromStringCollection:aCollectionOfStrings separatedBy:aSeparatorString
    "return new string formed by concatenating a copy of the argument, aString"

    |newString first|

    newString := ''.
    first := true.
    aCollectionOfStrings do:[:s |
	first ifFalse:[
	    newString := newString , aSeparatorString
	] ifTrue:[
	    first := false
	].
	newString := newString , s
    ].
    ^ newString

    "
     String fromStringCollection:#('hello' 'world' 'how' 'about' 'this') separatedBy:' '
    "

    "Created: 20.11.1995 / 15:32:17 / cg"
!

fromUTF8Bytes:aByteCollection
    "return a new string which represents the characters as decoded
     from the utf8 encoded bytes, aByteCollection.
     Returns either a normal String, or a TwoByteString instance.
     Only useful, when reading twoByteStrings from external sources.
     This only handles up-to 16bit characters"

    ^ self decodeFromUTF8:aByteCollection.

    "
     CharacterArray fromUTF8Bytes:#[ 16r41 16r42 ]
     CharacterArray fromUTF8Bytes:#[ 16rC1 16r02 ]
     CharacterArray fromUTF8Bytes:#[ 16rE0 16r81 16r02 ]
     CharacterArray fromUTF8Bytes:#[ 16rEF 16rBF 16rBF ]

   rfc2279 examples:
     CharacterArray fromUTF8Bytes:#[ 16r41 16rE2 16r89 16rA2 16rCE 16r91 16r2E ]
     CharacterArray fromUTF8Bytes:#[ 16rED 16r95 16r9C 16rEA 16rB5 16rAD 16rEC 16r96 16rB4 ]
     CharacterArray fromUTF8Bytes:#[ 16rE6 16r97 16rA5 16rE6 16r9C 16rAC 16rE8 16rAA 16r9E ]

   invalid:
     CharacterArray fromUTF8Bytes:#[ 16rC0 16r80 ]
     CharacterArray fromUTF8Bytes:#[ 16rE0 16r80 16r80 ]
    "
!

new
    "return a new empty string"

    ^ self basicNew:0
! !

!CharacterArray class methodsFor:'Signal constants'!

decodingFailedSignal
    "return the signal, raised when decoding of a string is not possible
     due to invalid characters contained in the source.
     This may happen for example, if a non EUC coded 8-bit string
     is attempted to be decoded into a JIS string."

    ^ DecodingFailedSignal

    "Created: 28.6.1997 / 20:09:55 / cg"
    "Modified: 3.8.1997 / 18:16:47 / cg"
!

encodingFailedSignal
    "return the (query-) signal, raised when encoding of a string is not possible
     due to invalid characters contained in the source."

    ^ EncodingFailedSignal

    "Modified: 28.6.1997 / 20:09:35 / cg"
    "Created: 3.8.1997 / 18:16:40 / cg"
! !

!CharacterArray class methodsFor:'cleanup'!

lowSpaceCleanup
    "cleanup in low-memory situations"

    DecoderTables := EncoderTables := nil

    "
     CharacterArray lowSpaceCleanup
    "

    "Created: 22.2.1996 / 16:30:30 / cg"
    "Modified: 22.2.1996 / 17:58:05 / cg"
! !

!CharacterArray class methodsFor:'encoding & decoding'!

decodeFromUTF8:aStringOrByteCollection
    "return a string which represents the characters as decoded
     from the utf8 encoded bytes, aByteCollection.
     Returns either a normal String, or a TwoByteString instance.
     Only useful, when reading twoByteStrings from external sources.
     This only handles up-to 16bit characters."

    ^ CharacterEncoderImplementations::ISO10646_to_UTF8 decodeString:aStringOrByteCollection

    "
     CharacterArray fromUTF8Bytes:#[ 16r41 16r42 ]
     CharacterArray fromUTF8Bytes:#[ 16rC1 16r02 ]
     CharacterArray fromUTF8Bytes:#[ 16rE0 16r81 16r02 ]
     CharacterArray fromUTF8Bytes:#[ 16rEF 16rBF 16rBF ]

   rfc2279 examples:
     CharacterArray fromUTF8Bytes:#[ 16r41 16rE2 16r89 16rA2 16rCE 16r91 16r2E ]
     CharacterArray fromUTF8Bytes:#[ 16rED 16r95 16r9C 16rEA 16rB5 16rAD 16rEC 16r96 16rB4 ]
     CharacterArray fromUTF8Bytes:#[ 16rE6 16r97 16rA5 16rE6 16r9C 16rAC 16rE8 16rAA 16r9E ]

   invalid:
     CharacterArray fromUTF8Bytes:#[ 16rC0 16r80 ]
     CharacterArray fromUTF8Bytes:#[ 16rE0 16r80 16r80 ]
    "
! !

!CharacterArray class methodsFor:'pattern matching'!

matchEscapeCharacter
    "return the character used to escape a matchCharacter
     (i.e. make it a regular character in a matchPattern)"

    ^ $\
!

matchScan:matchScanArray from:matchStart to:matchStop with:aString from:start to:stop ignoreCase:ignoreCase
    "helper for match; return true if the characters from start to stop in
     aString are matching the scan in matchScan from matchStart to matchStop.
     The matchScan is as created by asMatchScanArray.

     This algorithm is not at all the most efficient;
     for heavy duty pattern matching, an interface (primitive) to the regex
     pattern matching package should be added."

    |matchEntry
     mStart "{ Class: SmallInteger }"
     mStop  "{ Class: SmallInteger }"
     sStart "{ Class: SmallInteger }"
     sStop  "{ Class: SmallInteger }"
     mSize  "{ Class: SmallInteger }"
     sSize  "{ Class: SmallInteger }"
     index  "{ Class: SmallInteger }"
     quickCheck matchLast nextMatchEntry
     checkChar included|

    mStart := matchStart.
    mStop := matchStop.
    sStart := start.
    sStop := stop.

    [true] whileTrue:[
"/ Transcript showCR:('match: ''' , (aString copyFrom:sStart to:sStop) ,
"/                    ''' against:' , (matchScanArray copyFrom:mStart to:mStop) printString).

	mSize := mStop - mStart + 1.
	sSize := sStop - sStart + 1.

	"empty strings match"
	(mSize == 0) ifTrue:[^ (sSize == 0)].

	matchEntry := matchScanArray at:mStart.

	"/ the most common case first:
	(sSize ~~ 0
	and:[(checkChar := (aString at:sStart)) = matchEntry]) ifTrue:[
	    "advance by one and continue"
	    mStart := mStart + 1.
	    sStart := sStart + 1
	] ifFalse:[
	    (matchEntry == #any) ifTrue:[
		"restString empty -> no match"
		(sSize == 0) ifTrue:[^ false].
		"# matches single character"
		((sSize == 1) and:[mSize == 1]) ifTrue:[^ true].
		"advance by one and continue"
		mStart := mStart + 1.
		sStart := sStart + 1
	    ] ifFalse:[
		(matchEntry == #anyString) ifTrue:[
		    "* alone matches anything"
		    (mSize == 1) ifTrue:[^ true].
		    "restString empty & matchString not empty -> no match"
		    (sSize == 0) ifTrue:[^ false].

		    "
		     try to avoid some of the recursion by checking last
		     character and continue with shortened strings if possible
		    "
		    quickCheck := false.
		    (mStop >= mStart) ifTrue:[
			matchLast := matchScanArray at:mStop.
			(matchLast ~~ #anyString) ifTrue:[
			    (matchLast == #any) ifTrue:[
				quickCheck := true
			    ] ifFalse:[
				matchLast == (aString at:sStop) ifTrue:[
				    quickCheck := true
				] ifFalse:[
				    matchLast isString ifTrue:[
					quickCheck := matchLast includes:(aString at:sStop)
				    ]
				]
			    ]
			]
		    ].
		    quickCheck ifTrue:[
			"
			 quickCheck ok, advance from the right
			"
			mStop := mStop - 1.
			sStop := sStop - 1
		    ] ifFalse:[
			"/ no quick check;
			"/ look for the next character(s)
			"/ and try matching there
			"/ (to avoid recursion)

			mStart < mStop ifTrue:[
			    nextMatchEntry := matchScanArray at:mStart+1.
			    nextMatchEntry isCharacter ifTrue:[
				sStart <= sStop ifTrue:[
				    [true] whileTrue:[
					ignoreCase ifFalse:[
					    index := aString indexOf:nextMatchEntry startingAt:sStart
					] ifTrue:[
					    index := aString findFirst:[:c | c asLowercase = nextMatchEntry asLowercase]
							   startingAt:sStart.
					].
					(index == 0 or:[index > sStop]) ifTrue:[
					    ^ false
					].
					(self matchScan:matchScanArray
					      from:(mStart + 1)
					      to:mStop
					      with:aString
					      from:index
					      to:sStop
					      ignoreCase:ignoreCase
					) ifTrue:[
					    ^ true
					].
					sStart := index + 1.
				    ]
				]
			    ]
			].

			"
			 no quick check possible;
			 loop over all possible substrings
			"
			index := sStart.
			[index <= sStop] whileTrue:[
			    (self matchScan:matchScanArray
				  from:(mStart + 1)
				  to:mStop
				  with:aString
				  from:index
				  to:sStop
				  ignoreCase:ignoreCase
			    ) ifTrue:[
				^ true
			    ].
			    index := index + 1
			].
			^ false
		    ].
		] ifFalse:[
		    (matchEntry isString) ifTrue:[
			"testString empty -> no match"
			(sSize == 0) ifTrue:[^ false].

			included := false.
			"/ checkChar := aString at:sStart.
			included := matchEntry includes:checkChar.
			included ifFalse:[
			    ignoreCase ifTrue:[
				checkChar isUppercase ifTrue:[
				    included := matchEntry includes:checkChar asLowercase.
				] ifFalse:[
				    included := matchEntry includes:checkChar asUppercase.
				]
			    ].
			].
			mStart := mStart + 1.
			mSize := mSize - 1.
			included ifFalse:[^ false].

			((sSize == 1) and:[mSize == 0]) ifTrue:[^ true].
			"cut off 1st char and continue"
			sStart := sStart + 1
		    ] ifFalse:[
			"/ must be single character

			"testString empty ?"
			(sSize == 0) ifTrue:[^ false].

			"first characters equal ?"
			"/ checkChar := aString at:sStart.
			ignoreCase ifFalse:[^ false].
			(checkChar asUppercase ~= matchEntry asUppercase) ifTrue:[^ false].

			"advance and continue"
			mStart := mStart + 1.
			sStart := sStart + 1
		    ]
		]
	    ]
	]
    ].

    "
     |scanArray s|

     scanArray := self matchScanArrayFrom:'*hello'.
     s := 'foo bar hello world'.
     CharacterArray
	 matchScan:scanArray
	 from:1
	 to:scanArray size
	 with:s
	 from:1
	 to:s size
	 ignoreCase:false
    "
    "
     |scanArray s|

     scanArray := self matchScanArrayFrom:'*hello*'.
     s := 'foo bar hello world'.
     CharacterArray
	 matchScan:scanArray
	 from:1
	 to:scanArray size
	 with:s
	 from:1
	 to:s size
	 ignoreCase:false
    "

    "Modified: / 15.10.1998 / 13:39:25 / cg"
!

matchScanArrayFrom:aString
    "scan a pattern string and decompose it into a scanArray.
     This is processed faster (especially with character ranges), and
     can also be reused later. (if the same pattern is to be searched again)"

    |coll
     idx "{ Class: SmallInteger }"
     end c1 c2 matchSet previous escape|

    previous := nil.
    escape := self matchEscapeCharacter.

    coll := OrderedCollection new.
    idx := 1. end := aString size.
    [idx <= end] whileTrue:[
	|char this|

	char := aString at:idx.
	char == $* ifTrue:[
	    previous ~~ #anyString ifTrue:[
		this := #anyString
	    ]
	] ifFalse:[
	    char == $# ifTrue:[
		previous ~~ #anyString ifTrue:[
		    this := #any
		]
	    ] ifFalse:[
		char == $[ ifTrue:[
		    matchSet := IdentitySet new.
		    idx := idx + 1.
		    idx > end ifTrue:[^ nil].
		    char := aString at:idx.
		    c1 := nil.
		    [char ~~ $]] whileTrue:[
			((char == $-) and:[c1 notNil]) ifTrue:[
			    idx := idx + 1.
			    idx > end ifTrue:[^ nil].
			    c2 := aString at:idx.
			    c1 to:c2 do:[:c | matchSet add:c].
			    c1 := nil.
			    idx := idx + 1.
			] ifFalse:[
			    (char ~~ $]) ifTrue:[
				matchSet add:char.
				c1 := char.
				idx := idx + 1
			    ]
			].
			idx > end ifTrue:[^ nil].
			char := aString at:idx
		    ].
		    this := matchSet asString
		] ifFalse:[
		    char == escape ifTrue:[
			idx := idx + 1.
			idx > end ifTrue:[
			    "/ mhmh - what should we do here ?
			    this := char
			] ifFalse:[
			    this := aString at:idx.
			]
		    ] ifFalse:[
			this := char
		    ]
		]
	    ]
	].
	this notNil ifTrue:[coll add:this. previous := this].
	idx := idx + 1
    ].

    ^ coll asArray

    "
     String matchScanArrayFrom:'*ute*'
     String matchScanArrayFrom:'**ute**'
     String matchScanArrayFrom:'*uter'
     String matchScanArrayFrom:'\*uter'
     String matchScanArrayFrom:'[cC]#mpute[rR]'
     String matchScanArrayFrom:'[abcd]*'
     String matchScanArrayFrom:'[a-k]*'
     String matchScanArrayFrom:'*some*compl*ern*'
     String matchScanArrayFrom:'[a-'
     String matchScanArrayFrom:'[a-zA-Z]'
     String matchScanArrayFrom:'[a-z01234A-Z]'
    "

    "Modified: 2.4.1997 / 16:20:29 / cg"
! !

!CharacterArray class methodsFor:'queries'!

isAbstract
    ^ self == CharacterArray
! !

!CharacterArray methodsFor:'Compatibility-ANSI'!

addLineDelimiters
    "Ansi compatibility - same as withCRs"

    ^ self withCRs

    "Modified: / 13.11.2001 / 19:16:25 / cg"
! !

!CharacterArray methodsFor:'Compatibility-Dolphin'!

argumentCount
    "same as #numArgs - return the number of arguments a message with myself
     as selector would take."

    ^  self numArgs
!

copyExpanding:expandTable
    "return a copy of myself, with translations from the expandTable sliced in.
     The argument is supposed to map from characters to either characters or strings."

    |ds|

    ds := WriteStream on:(self species new).
    self do:[:eachChar |
	|repl|

	repl := expandTable at:eachChar ifAbsent:nil.
	repl isNil ifTrue:[
	    ds nextPut:eachChar
	] ifFalse:[
	    repl size == 0 ifTrue:[
		ds nextPut:repl
	    ] ifFalse:[
		ds nextPutAll:repl
	    ]
	].
    ].
    ^ ds contents.
!

formatWith:aString
    "same as #bindWith: for dolphin compatibility"

    ^ self bindWith:aString

    "
     'hello%1world' formatWith:'123'
    "
!

formatWith:arg1 with:arg2
    "same as #bindWith: for dolphin compatibility"

    ^ self bindWith:arg1 with:arg2

    "
     'hello%1 %2world' formatWith:'123' with:234
    "
!

formatWith:arg1 with:arg2 with:arg3
    "same as #bindWith: for dolphin compatibility"

    ^ self bindWith:arg1 with:arg2 with:arg3

    "
     'hello%1 %2 %3world' formatWith:'123' with:234 with:345
    "
! !

!CharacterArray methodsFor:'Compatibility-ST/V'!

byteAt:index put:aByte
    "store a byte at given index.
     This is an ST/V compatibility method."

"/    (aByte == 0) ifTrue:[
"/        "store a space instead"
"/        ^ super basicAt:index put:(Character space)
"/    ].
    ^ super at:index put:(Character value:aByte)

    "
     'hello' copy at:1 put:$H asciiValue; yourself
     'hello' copy byteAt:1 put:72; yourself
     'hello' copy byteAt:1 put:0; yourself
    "

    "Modified: 6.5.1996 / 10:35:26 / cg"
!

replChar:oldChar with:newChar
    "return a copy of the receiver, with all oldChars replaced
     by newChar.
     This is an ST/V compatibility method."

    ^ self copyReplaceAll:oldChar with:newChar

    "
     '12345678901234567890' replChar:$0 with:$*
    "

    "Modified: / 18.7.1998 / 22:52:57 / cg"
!

replChar:oldChar withString:newString
    "return a copy of the receiver, with all oldChars replaced
     by newString (i.e. slice in the newString in place of the oldChar).
     This is an ST/V compatibility method."

    |tmpStream|

    tmpStream := WriteStream on:(self class new).
    self do:[:element |
	element = oldChar ifTrue:[
	    tmpStream nextPutAll:newString
	] ifFalse:[
	    tmpStream nextPut:element
	].
    ].
    ^ tmpStream contents

   "
     '12345678901234567890' replChar:$0 withString:'foo'
     'a string with spaces' replChar:$  withString:' foo '
    "
!

replString:subString withString:newString
    "return a copy of the receiver, with all sequences of subString replaced
     by newString (i.e. slice in the newString in place of the oldString)."

    ^ self copyReplaceString:subString withString:newString

   "
     '12345678901234567890' replString:'123' withString:'OneTwoThree'
     '12345678901234567890' replString:'123' withString:'*'
     '12345678901234567890' replString:'234' withString:'foo'

     ('a string with spaces' replChar:$  withString:' foo ')
	replString:'foo' withString:'bar'
    "

    "Modified: / 12-05-2004 / 12:00:27 / cg"
!

trimBlanks
    "return a copy of the receiver without leading
     and trailing spaces.
     This is an ST/V compatibility method."

    ^ self withoutSpaces

    "
     '    spaces at beginning' trimBlanks
     'spaces at end    ' trimBlanks
     '    spaces at beginning and end     ' trimBlanks
     'no spaces' trimBlanks
    "
! !

!CharacterArray methodsFor:'Compatibility-Squeak'!

asDate
    "Many allowed forms, see Date.readFrom:"

    ^ Date readFrom: (ReadStream on: self)

    "
     '30 Apr 1999' asDate dayName capitalized
    "
!

capitalized
    ^ self asUppercaseFirst

    "
     'hello' capitalized
    "
!

displayProgressAt:aPointOrNil from:start to:stop during:aBlock
     ProgressIndicator
	displayProgress:self
	at:aPointOrNil
	from:start
	to:stop
	during:aBlock.

    "
     'dobedobedoobedoo'
	displayProgressAt:(Screen current center)
	from:0 to:100
	during:[:val |
		0 to:100 do:[:i |
		    val value:i.
		    Delay waitForSeconds:0.05.
		]
	       ]
    "
!

endsWithDigit
    "Answer whether the receiver's final character represents a digit.  3/11/96 sw"

    ^ self size > 0 and: [self last isDigit]
!

findTokens:delimiters
    "cg: I am not sure, if this is really the squeak semantics (w.r.t. empty fields)"

    ^ self asCollectionOfSubstringsSeparatedByAny:delimiters

    "
     'a|b#c||e' findTokens:#($# $|)
    "
!

includesSubString:aString
    "return true, if a substring is contained in the receiver.
     The compare is case sensitive."

    ^ self includesString:aString

    "
     'hello world' includesSubString:'Hel'
     'hello world' includesSubString:'hel'
     'hello world' includesSubString:'llo'
    "



!

includesSubstring:aString caseSensitive:caseSensitive
    "return true, if a substring is contained in the receiver.
     The argument, caseSensitive controls if case is ignored in the compare."

    "/ for now,  a q&d hack ...

    caseSensitive ifFalse:[
	^ self asLowercase includesString:aString asLowercase
    ].
    ^ self includesString:aString

    "
     'hello world' includesSubstring:'Hel' caseSensitive:true
     'hello world' includesSubstring:'Hel' caseSensitive:false
    "



!

isAllDigits
    "Answer whether the receiver's characters are all digits"

    ^ self conform:[:eachChar | eachChar isDigit]

    "
     'hello world' isAllDigits
     '12344' isAllDigits
    "
!

lastSpacePosition
    ^ self lastIndexOfSeparator
!

padded:leftOrRight to:paddedSize with:padCharacter
    leftOrRight == #left ifTrue:[
	^ self leftPaddedTo:paddedSize with:padCharacter
    ].
    ^ self paddedTo:paddedSize with:padCharacter

    "
     'hello' padded:#right to:10 with:$.
     'hello' padded:#left to:10 with:$.
    "
!

skipDelimiters:delimiters startingAt:start
    "Answer the index of the character within the receiver, starting at start,
     that does NOT match one of the delimiters.
     If the receiver does not contain any of the delimiters, answer size + 1.
     Assumes the delimiters to be a non-empty string."

    start to:self size do:[:i |
	delimiters detect:[:delim | delim = (self at:i) ] ifNone:[ ^ i ]
    ].
    ^ self size + 1

    "
     '123***7890' skipDelimiters:'*' startingAt:4
     '123***7890' skipDelimiters:'*' startingAt:3
     '123***7890' skipDelimiters:'*' startingAt:10
     '123*******' skipDelimiters:'*' startingAt:10
    "
!

substrings
    ^ self asCollectionOfWords

    "
     'foo bar baz' substrings
    "
!

truncateTo:smallSize
    "return myself or a copy shortened to smallSize.  1/18/96 sw"

    self size <= smallSize ifTrue:[^ self].
    ^ self copyFrom: 1 to: smallSize

    "
     'hello world' truncateTo:5
     'hello' truncateTo:10

     'hello world' copyTo:5
     'hello' copyTo:10
    "
!

withBlanksTrimmed
    "Return a copy of the receiver from which leading and trailing blanks have been trimmed."

    ^ self withoutSpaces

    "
     '  hello    world    ' withBlanksTrimmed
    "



!

withNoLineLongerThan: aNumber
    "Answer a string with the same content as receiver, but rewrapped so that no line has more characters than the given number"

    | listOfLines currentLast currentStart resultString putativeLast putativeLine crPosition |

    aNumber isNumber not | (aNumber < 1) ifTrue: [self error: 'too narrow'].
    listOfLines _ OrderedCollection new.
    currentLast _ 0.
    [currentLast < self size] whileTrue:
	    [currentStart _ currentLast + 1.
	    putativeLast _ (currentStart + aNumber - 1) min: self size.
	    putativeLine _ self copyFrom: currentStart to: putativeLast.
	    (crPosition _ putativeLine indexOf: Character cr) > 0 ifTrue:
		    [putativeLast _ currentStart + crPosition - 1.
		    putativeLine _ self copyFrom: currentStart to: putativeLast].
	    currentLast _ putativeLast == self size
		    ifTrue:
			    [putativeLast]
		    ifFalse:
			    [currentStart + putativeLine lastSpacePosition - 1].
	    currentLast <= currentStart ifTrue:
		    ["line has NO spaces; baleout!!"
		    currentLast _ putativeLast].
	    listOfLines add: (self copyFrom: currentStart to: currentLast) withBlanksTrimmed].

    listOfLines size > 0 ifFalse: [^ ''].
    resultString _ listOfLines first.
    2 to: listOfLines size do:
	    [:i | resultString _ resultString, Character cr asString, (listOfLines at: i)].
    ^ resultString

    "
     #(5 7 20) collect:
	[:i | 'Fred the bear went down to the brook to read his book in silence' withNoLineLongerThan: i]
    "



! !

!CharacterArray methodsFor:'Compatibility-V''Age'!

addLineDelimiter
    "replace all '\'-characters by line delimiter (cr) - characters.
     This has been added for VisualAge compatibility."

    ^ self withCRs
!

bindWith:aString
    "return a copy of the receiver, where a '%1' escape is
     replaced by aString.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:aString)

    "
     'do you like %1 ?' bindWith:'smalltalk'
    "
!

bindWith:string1 with:string2
    "return a copy of the receiver, where a '%1' escape is
     replaced by string1 and '%2' is replaced by string2.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:string1 with:string2)

    "
     'do you prefer %1 or rather %2 ?'
	bindWith:'smalltalk' with:'c++'
    "
!

bindWith:str1 with:str2 with:str3
    "return a copy of the receiver, where a '%1', '%2' and '%3' escapes
     are replaced by str1, str2 and str3 respectively.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:str1 with:str2 with:str3)

    "
     'do you prefer %1 or rather %2 (not talking about %3) ?'
	bindWith:'smalltalk' with:'c++' with:'c'
    "
!

bindWith:str1 with:str2 with:str3 with:str4
    "return a copy of the receiver, where a '%1', '%2', '%3' and '%4' escapes
     are replaced by str1, str2, str3 and str4 respectively.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:str1 with:str2 with:str3 with:str4)

    "
     'do you prefer %1 or rather %2 (not talking about %3 or even %4) ?'
	bindWith:'smalltalk' with:'c++' with:'c' with:'assembler'
    "
!

bindWith:str1 with:str2 with:str3 with:str4 with:str5
    "return a copy of the receiver, where a '%1' .. '%5' escapes
     are replaced by str1 .. str5 respectively.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:str1 with:str2 with:str3 with:str4 with:str5)

    "Created: 31.1.1997 / 16:25:42 / cg"
!

bindWith:str1 with:str2 with:str3 with:str4 with:str5 with:str6
    "return a copy of the receiver, where a '%1' .. '%6' escapes
     are replaced by str1 .. str5 respectively.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:str1 with:str2
					 with:str3 with:str4
					 with:str5 with:str6)

!

bindWithArguments:anArrayOfStrings
    "return a copy of the receiver, where a '%i' escape
     is replaced by the coresponding string from the argument array.
     'i' may be between 1 and 9 (i.e. a maximum of 9 placeholders is allowed).
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:anArrayOfStrings

    "
     'do you prefer %1 or rather %2 (not talking about %3) ?'
	bindWithArguments:#('smalltalk' 'c++' 'c')
    "
!

subStrings
    "return an array consisting of all words contained in the receiver.
     Words are separated by whitespace.
     This has been added for VisualAge compatibility."

    ^ self asCollectionOfWords

    "
     'hello world, this is smalltalk' subStrings
    "
!

subStrings:separatorCharacter
    "return an array consisting of all words contained in the receiver.
     Words are separated by separatorCharacter.
     This has been added for VisualAge compatibility."

    ^ self asCollectionOfSubstringsSeparatedBy:separatorCharacter

    "
     'foo:bar:baz:smalltalk' subStrings:$:
    "
!

trimSeparators
    "return a copy of the receiver without leading and trailing whiteSpace"

    ^ self withoutSeparators
! !

!CharacterArray methodsFor:'Compatibility-VW'!

asLogicalFileSpecification
    ^ self asFilename

    "Created: / 30.10.2001 / 17:29:53 / cg"
!

asQualifiedReference
    ^ BindingReference pathString:(self string)

!

expandMacros
    "ST80 compatibility - expand '<..>' macros with
     argument strings. Similar to #bindWith:.
     Read the comment in #expandMacrosWithArguments: about
     limited compatibility issues."

    ^ self expandMacrosWithArguments:#()

    "
     'hello<n>foo' expandMacros
    "

    "Modified: / 18.6.1998 / 16:03:02 / cg"
!

expandMacrosWith:arg
    "ST80 compatibility - expand '<..>' macros with
     argument strings. Similar to #bindWith:.
     Read the comment in #expandMacrosWithArguments: about
     limited compatibility issues."

    ^ self expandMacrosWithArguments:(Array with:arg)

    "Created: / 1.11.1997 / 13:01:28 / cg"
    "Modified: / 1.11.1997 / 13:30:50 / cg"
!

expandMacrosWith:arg1 with:arg2
    "ST80 compatibility - expand '<..>' macros with
     argument strings. Similar to #bindWith:.
     Read the comment in #expandMacrosWithArguments: about
     limited compatibility issues."

    ^ self expandMacrosWithArguments:(Array with:arg1 with:arg2)

    "Modified: / 6.7.1998 / 21:58:14 / cg"
!

expandMacrosWith:arg1 with:arg2 with:arg3
    "ST80 compatibility - expand '<..>' macros with
     argument strings. Similar to #bindWith:.
     Read the comment in #expandMacrosWithArguments: about
     limited compatibility issues."

    ^ self expandMacrosWithArguments:(Array with:arg1 with:arg2 with:arg3)
!

expandMacrosWithArguments:argArray
    "ST80 compatibility - expand '<..>' macros with
     argument strings. Similar to #bindWith:.
     WARNING: possibly not all ST80 expansions are supported here."

    "/ supported expansions:
    "/
    "/   <#p>       # is arg Number; slice in the args printString
    "/   <#s>       # is arg Number; slice in the arg itself (must know asString)
    "/   <#?s1:s2>  # is arg Number; slice in s1 if the arg is true, s2 otherwise
    "/              use first arg if # is not given (i.e. no number before s,p ...)
    "/   <n>        replace by a newLine character
    "/   <t>        replace by a tab character
    "/   %X         the X character itself

    |in out c fmt nr arg s1 s2|

    in := self readStream.
    out := WriteStream on:(String uninitializedNew:self size).

    [in atEnd] whileFalse:[
	c := in next.
	c == $% ifTrue:[
	    c := in next.
	    out nextPut:c
	] ifFalse:[
	    c == $< ifTrue:[
		[in peek == $<] whileTrue:[
		    out nextPut:$<.
		    in next.
		].
		in peek == $n ifTrue:[
		    out nextPut:Character cr.
		    in next
		] ifFalse:[in peek == $t ifTrue:[
		    out nextPut:Character tab.
		    in next
		] ifFalse:[
		    in peek isDigit ifFalse:[
			nr := 1
		    ] ifTrue:[
			"/ start an argument expansion ...
			nr := Integer readFrom:in onError:nil.
			nr isNil ifTrue:[
			    "/ what does VW do here ?
			    self error:'invalid format' mayProceed:true.
			    ^ self
			].
			(nr between:1 and:argArray size) ifFalse:[
			    "/ what does VW do here ?
			    self error:'invalid format - bad argNr' mayProceed:true.
			    ^ self
			].
		    ].
		    arg := argArray at:nr.

		    fmt := in next.
		    (fmt == $p) ifTrue:[
			"/ expand with args printString
			out nextPutAll:arg printString.
		    ] ifFalse:[ (fmt == $s) ifTrue:[
			"/ expand with arg itself
			arg isText ifTrue:[
			    out := (WriteStream on:(Text new)) nextPutAll:(out contents); yourself.
			    out nextPutAll:arg asText.
			] ifFalse:[
			    out nextPutAll:arg asString string.
			]
		    ] ifFalse:[ (fmt == $?) ifTrue:[
			s1 := in upTo:$:.
			s2 := in nextUpTo:$>.
			arg ifTrue:[
			    out nextPutAll:s1
			] ifFalse:[
			    out nextPutAll:s2
			].
		    ] ifFalse:[
			"/ what does VW do here ?
			self error:'invalid format' mayProceed:true.
			^ self
		    ]]].
		]].
		c := in next.
		c ~~ $> ifTrue:[
		    "/ what does VW do here ?
		    self error:'invalid format' mayProceed:true.
		    ^ self
		]
	    ] ifFalse:[
		out nextPut:c
	    ]
	].
    ].
    ^ out contents

    "
     'hello <1s> how are you' expandMacrosWith:(OperatingSystem getLoginName)
     'one plus one is <1p>' expandMacrosWith:2
    "

    "Modified: / 18.6.1998 / 16:04:46 / cg"
!

isCharacters
    "added for visual works compatibility"
    ^ true
! !

!CharacterArray methodsFor:'JavaScript support'!

+ aStringOrCharacter
    "alternative string-concatenation.
     For JavaScript only"

    ^ self , aStringOrCharacter

    "
     'hello' + $1  
     'hello' + '1' 
    "
! !

!CharacterArray methodsFor:'character searching'!

includesMatchCharacters
    "return true if the receiver includes any meta characters (i.e. $* or $#)
     for match operations; false if not.
     Here, do not care for $\ escapes"

    ^ self includesAny:'*#['

    "
     '*foo' includesMatchCharacters
     '\*foo' includesMatchCharacters
     '\*foo' includesUnescapedMatchCharacters
     '*foo' includesMatchCharacters
     '\\*foo' includesMatchCharacters
     'foo*' includesMatchCharacters
     'foo\*' includesMatchCharacters
     'foo\' includesMatchCharacters
    "

    "Modified: 2.4.1997 / 18:12:34 / cg"
!

includesSeparator
    "return true, if the receiver contains any whitespace characters"

    ^ (self indexOfSeparator ~~ 0)

    "
     'hello world' includesSeparator
     'helloworld' includesSeparator
    "
!

includesUnescapedMatchCharacters
    "return true if the receiver really includes any meta characters (i.e. $* or $#)
     for match operations; false if not.
     Here, care for $\ escapes"

    |idx sz specialChars escape|

    idx := 1.
    sz := self size.
    specialChars := '*#[\'.
    (escape := self class matchEscapeCharacter) ~~ $\ ifTrue:[
	specialChars := specialChars copy.
	specialChars at:specialChars size put:escape
    ].

    [true] whileTrue:[
	idx := self indexOfAny:specialChars startingAt:idx.
	idx == 0 ifTrue:[^ false].
	(self at:idx) == escape ifFalse:[^ true].
	idx := idx + 2.
	idx > sz ifTrue:[^ false].
    ].

    "
     '*foo' includesUnescapedMatchCharacters
     '\*foo' includesUnescapedMatchCharacters
     '\\foo' includesUnescapedMatchCharacters
     '\\\$foo' includesUnescapedMatchCharacters
     '*foo' includesUnescapedMatchCharacters
     '\\*foo' includesUnescapedMatchCharacters
     'foo*' includesUnescapedMatchCharacters
     'foo\*' includesUnescapedMatchCharacters
     'foo\' includesUnescapedMatchCharacters
    "

    "Modified: 2.4.1997 / 17:08:52 / cg"
    "Created: 2.4.1997 / 17:23:26 / cg"
!

indexOfControlCharacterStartingAt:startIndex
    "return the index of the next control character;
     starting the search at startIndex, searching forward;
     that is a character with asciiValue < 32.
     Return 0 if none is found."

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.
    mySize := self size.

    start to:mySize do:[:index |
	(self at:index) isControlCharacter ifTrue:[^ index]
    ].
    ^ 0

    "
     'hello world' asTwoByteString            indexOfControlCharacterStartingAt:1
     'hello world\foo' withCRsasTwoByteString indexOfControlCharacterStartingAt:1
    "

    "Modified: / 21.7.1998 / 17:25:07 / cg"
!

indexOfNonSeparator
    "return the index of the first non-whitespace character.
     return 0 if no non-separator was found"

    ^ self indexOfNonSeparatorStartingAt:1.

    "
     '    hello world' indexOfNonSeparator
     '    ' indexOfNonSeparator
     'a   ' indexOfNonSeparator
     'abc' indexOfNonSeparator
     ' ' indexOfNonSeparator
     '' indexOfNonSeparator
    "
!

indexOfNonSeparatorStartingAt:startIndex
    "return the index of the next non-whitespace character,
     starting the search at startIndex, searching forward;
     return 0 if no non-separator was found"

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.
    mySize := self size.

    start to:mySize do:[:index |
	(self at:index) isSeparator ifFalse:[^ index]
    ].
    ^ 0

    "
     '    hello world' indexOfNonSeparatorStartingAt:1
     '    ' indexOfNonSeparatorStartingAt:1
     'a   ' indexOfNonSeparatorStartingAt:2
    "

    "
     |s index1 index2|
     s := '   foo    bar      baz'.
     index1 := s indexOfNonSeparatorStartingAt:1.
     index2 := s indexOfSeparatorStartingAt:index1.
     s copyFrom:index1 to:index2 - 1
    "
!

indexOfSeparator
    "return the index of the first whitespace character;
     starting the search at the beginning, searching forward;
     return 0 if no separator was found"

    ^ self indexOfSeparatorStartingAt:1

    "
     'hello world' indexOfSeparator
     'helloworld' indexOfSeparator
     'hello   ' indexOfSeparator
     '   hello' indexOfSeparator
    "
!

indexOfSeparatorStartingAt:startIndex
    "return the index of the next whitespace character,
     starting the search at startIndex, searching forward;
     return 0 if no separator was found"

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.
    mySize := self size.

    start to:mySize do:[:index |
	(self at:index) isSeparator ifTrue:[^ index]
    ].
    ^ 0

    "
     'hello world' indexOfSeparatorStartingAt:3
     ' hello world' indexOfSeparatorStartingAt:3
     'hello world ' indexOfSeparatorStartingAt:3
     'hello world ' indexOfSeparatorStartingAt:6
     'hello world ' indexOfSeparatorStartingAt:7
     'helloworld ' indexOfSeparatorStartingAt:7
     'helloworld' indexOfSeparatorStartingAt:7
    "
!

lastIndexOfSeparator
    "return the last index of the whitespace character.
     (i.e. start the search at the end and search backwards);
     Returns 0 if no separator is found."

    ^ self lastIndexOfSeparatorStartingAt:(self size)

    "
     'hello world' lastIndexOfSeparator
     'helloworld' lastIndexOfSeparator
     'hel lo wor ld' lastIndexOfSeparator
     'hel   ' lastIndexOfSeparator 6
    "
!

lastIndexOfSeparatorStartingAt:startIndex
    "return the index of the previous whitespace character,
     starting the search at startIndex (and searching backwards);
     returns 0 if no separator was found"

    |start  "{ Class: SmallInteger }"|

    start := startIndex.

    start to:1 by:-1 do:[:index |
	(self at:index) isSeparator ifTrue:[^ index]
    ].
    ^ 0

    "
     'hello world' lastIndexOfSeparatorStartingAt:3
     'hello world' lastIndexOfSeparatorStartingAt:7
     'helloworld' lastIndexOfSeparatorStartingAt:7
     ' helloworld' lastIndexOfSeparatorStartingAt:7
    "
! !

!CharacterArray methodsFor:'comparing'!

< aString
    "Compare the receiver with the argument and return true if the
     receiver is greater than the argument. Otherwise return false.
     This comparison is based on the elements ascii code -
     i.e. upper/lowercase & upper/lowercase & national characters are NOT treated specially."

    |mySize    "{ Class: SmallInteger }"
     otherSize "{ Class: SmallInteger }"
     n         "{ Class: SmallInteger }"
     c1 c2|

    mySize := self size.
    otherSize := aString string size.
    n := mySize min:otherSize.

    1 to:n do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	(c1 == c2 or:[c1 = c2]) ifFalse:[^ c1 < c2].
    ].
    ^ mySize < otherSize
!

= aString
    "Compare the receiver with the argument and return true if the
     receiver is equal to the argument. Otherwise return false.

     This compare does NOT ignore case differences,
     therefore 'foo' = 'Foo' will return false.
     Since this is incompatible to ST-80 (at least, V2.x) , this may change."

    |mySize    "{ Class: SmallInteger }"
     otherSize |

    (aString isString or:[aString species == self species]) ifFalse:[
	^ false
    ].
    mySize := self size.
    otherSize := aString size.
    mySize == otherSize ifFalse:[^ false].

    1 to:mySize do:[:index |
	(self at:index) = (aString at:index) ifFalse:[^ false].
    ].
    ^ true

    "
     'foo' = 'Foo'
     'foo' = 'bar'
     'foo' = 'foo'
     'foo' = 'foo' asText
     'foo' asText = 'foo'
     'foo' asText = 'foo' asText
    "

    "Modified: 22.4.1996 / 15:53:58 / cg"
!

> aString
    "Compare the receiver with the argument and return true if the
     receiver is greater than the argument. Otherwise return false.
     This comparison is based on the elements ascii code -
     i.e. upper/lowercase & upper/lowercase & national characters are NOT treated specially."

    |mySize    "{ Class: SmallInteger }"
     otherSize "{ Class: SmallInteger }"
     n         "{ Class: SmallInteger }"
     c1 c2|

    mySize := self size.
    otherSize := aString string size.
    n := mySize min:otherSize.

    1 to:n do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	(c1 == c2 or:[c1 = c2]) ifFalse:[^ c1 > c2].
    ].
    ^ mySize > otherSize

    "Modified: 22.4.1996 / 15:55:00 / cg"
!

compareCaselessWith:aString
    "Compare the receiver against the argument, ignoreing case.
     Return 1 if the receiver is greater, 0 if equal and -1 if less than the argument.

     This comparison is based on the elements ascii code -
     i.e. national characters are NOT treated specially.
     'foo' compareWith: 'Foo' will return 0"

    |mySize    "{ Class: SmallInteger }"
     otherSize "{ Class: SmallInteger }"
     n         "{ Class: SmallInteger }"
     c1 c2|

    mySize := self size.
    otherSize := aString string size.
    n := mySize min:otherSize.

    1 to:n do:[:index |
	c1 := (self at:index) asLowercase.
	c2 := (aString at:index) asLowercase.
	c1 > c2 ifTrue:[^ 1].
	c1 < c2 ifTrue:[^ -1].
    ].
    mySize > otherSize ifTrue:[^ 1].
    mySize < otherSize ifTrue:[^ -1].
    ^ 0

    "Modified: 22.4.1996 / 15:56:07 / cg"
!

compareWith:aString
    "Compare the receiver with the argument and return 1 if the receiver is
     greater, 0 if equal and -1 if less than the argument.
     This comparison is based on the elements ascii code -
     i.e. upper/lowercase & national characters are NOT treated specially.
     'foo' compareWith: 'Foo' will return 1.
     while 'foo' sameAs:'Foo' will return true"

    |mySize    "{ Class: SmallInteger }"
     otherSize "{ Class: SmallInteger }"
     n         "{ Class: SmallInteger }"
     c1 c2|

    mySize := self size.
    otherSize := aString string size.
    n := mySize min:otherSize.

    1 to:n do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	c1 > c2 ifTrue:[^ 1].
	c1 < c2 ifTrue:[^ -1].
    ].
    mySize > otherSize ifTrue:[^ 1].
    mySize < otherSize ifTrue:[^ -1].
    ^ 0

    "Modified: 22.4.1996 / 15:56:07 / cg"
!

hash
    "return an integer useful as a hash-key"

%{  /* NOCONTEXT */

    REGISTER unsigned g, val;
    REGISTER unsigned char *cp, *cp0;
    int l;

    cp = __stringVal(self);
    l = __stringSize(self);
    if (__qClass(self) != @global(String)) {
	int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(__qClass(self))->c_ninstvars));

	cp += n;
	l -= n;
    }

    /*
     * this is the dragon-book algorithm
     */

    val = 0;
    switch (l) {
    default:
	for (cp0 = cp, cp += l - 1; cp >= cp0; cp--) {
	    val = (val << 4) + *cp;
	    if (g = (val & 0xF0000000)) {
		val ^= g >> 24;
		val ^= g;
	    }
	}
	break;
    case 7:
	val = cp[6] << 4;
    case 6:
	val = (val + cp[5]) << 4;
    case 5:
	val = (val + cp[4]) << 4;
    case 4:
	val = (val + cp[3]) << 4;
    case 3:
	val = (val + cp[2]) << 4;
    case 2:
	val = (val + cp[1]) << 4;
    case 1:
	val = val + cp[0];
    case 0:
	break;
    }

    /*
     * multiply by large prime to spread values
     * This speeds up Set and Dictionary by a factor of 10!
     */
    val *= 31415821;
    RETURN ( __mkSmallInteger(val & 0x3fffffff));
%}

!

sameAs:aString
    "Compare the receiver with the argument like =, but ignore case differences.
     Return true or false."

    |mySize "{ Class: SmallInteger }"
     otherSize c1 c2|

    self == aString ifTrue:[^ true].

    mySize := self size.
    otherSize := aString string size.
    mySize == otherSize ifFalse:[^ false].

    1 to:mySize do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	c1 == c2 ifFalse:[
	    (c1 sameAs:c2) ifFalse:[^ false].
	]
    ].
    ^ true

    "
     'foo' sameAs: 'Foo'
     'foo' sameAs: 'bar'
     'foo' sameAs: 'foo'
    "

    "Modified: 22.4.1996 / 15:56:17 / cg"
!

sameAs:aString ignoreCase:ignoreCase
    "Compare the receiver with the argument.
     If ignoreCase is true, this is the same as #sameAs:,
     if false, this is the same as #=."

    ignoreCase ifTrue:[
	^ self sameAs:aString
    ].
    ^ self = aString

    "
     'foo' sameAs:'Foo' ignoreCase:false
     'foo' sameAs:'foo' ignoreCase:true
    "

!

sameCharacters:aString
    "count & return the number of characters which are the same
     (ignoring case and emphasis) in the receiver and the argument, aString."

    |n "{ Class: SmallInteger }"
     c1 c2 cnt|

    n := self size.
    n := n min:(aString string size).

    cnt := 0.
    1 to:n do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	((c1 == c2)
	or:[c1 asLowercase = c2 asLowercase]) ifTrue:[
	    cnt := cnt + 1
	]
    ].
    ^ cnt

    "
     'foobarbaz' sameCharacters: 'foo'
     'foobarbaz' sameCharacters: 'Foo'
     'foobarbaz' sameCharacters: 'baz'
    "
!

sameEmphasisAs:aStringOrText
    "compare the receivers and the arguments emphasis"

    ^ self emphasis = aStringOrText emphasis

    "
     'hello' asText sameEmphasisAs: 'hello'
     'hello' asText sameEmphasisAs: 'hello' asText
     'hello' asText allBold sameEmphasisAs: 'hello'
     'hello' asText allBold sameEmphasisAs: 'fooba' asText allBold
     'hello' asText allBold sameEmphasisAs: 'fooba' asText allItalic
    "


!

sameStringAndEmphasisAs:aStringOrText
    "compare both emphasis and string of the receiver and the argument"

    aStringOrText isString ifFalse:[^ false].
    (self string = aStringOrText string) ifFalse:[^ false].
    self hasChangeOfEmphasis = aStringOrText hasChangeOfEmphasis ifFalse:[^ false].
    ^ self emphasis = aStringOrText emphasis

    "
     'hello' asText sameEmphasisAs: 'hello'
     'hello' asText sameEmphasisAs: 'hello' asText
     'hello' asText allBold sameEmphasisAs: 'hello'
     'hello' asText allBold sameEmphasisAs: 'fooba' asText allBold
     'hello' asText allBold sameEmphasisAs: 'fooba' asText allItalic

     'hello' sameEmphasisAs: 'hello' asText
     'hello' sameEmphasisAs: 'hello' asText allBold
     'hello' sameEmphasisAs: 'fooba'
     'hello' sameEmphasisAs: 'fooba' asText
     'hello' sameEmphasisAs: 'fooba' asText allBold
     'hello' sameEmphasisAs: 'fooba' asText allItalic

     'hello' asText sameStringAndEmphasisAs: 'hello'
     'hello' asText sameStringAndEmphasisAs: 'hello' asText
     'hello' asText allBold sameStringAndEmphasisAs: 'hello'
     'hello' asText allBold sameStringAndEmphasisAs: 'fooba' asText allBold
     'hello' asText allBold sameStringAndEmphasisAs: 'fooba' asText allItalic

     'hello' sameStringAndEmphasisAs: 'hello' asText
     'hello' sameStringAndEmphasisAs: 'hello' asText allBold
     'hello' sameStringAndEmphasisAs: 'fooba'
     'hello' sameStringAndEmphasisAs: 'fooba' asText
     'hello' sameStringAndEmphasisAs: 'fooba' asText allBold
     'hello' sameStringAndEmphasisAs: 'fooba' asText allItalic
    "
! !

!CharacterArray methodsFor:'converting'!

asArrayOfSubstrings
    "return an array of substrings from the receiver, interpreting
     separators (i.e. spaces & newlines) as word-delimiters.
     This is a compatibility method - the actual work is done in
     asCollectionOfWords."

    ^ self asCollectionOfWords asArray

    "
     '1 one two three four 5 five' asArrayOfSubstrings
     '1
one
	two three four 5 five' asArrayOfSubstrings
    "
!

asCollectionOfLines
    "return a collection containing the lines (separated by cr)
     of the receiver. If multiple cr's occur in a row, the result will
     contain empty strings."

    ^ self asCollectionOfSubstringsSeparatedBy:Character cr

    "
     '1 one\2 two\3 three\4 four\5 five' withCRs asCollectionOfLines
     '1 one\\\\2 two\3 three' withCRs asCollectionOfLines
    "
!

asCollectionOfSubstringsSeparatedBy:aCharacter
    "return a collection containing the lines (separated by aCharacter)
     of the receiver. If aCharacter occurs multiple times in a row,
     the result will contain empty strings."

    ^ self asCollectionOfSubCollectionsSeparatedBy:aCharacter

    "
     '1 one:2 two:3 three:4 four:5 five' withCRs asCollectionOfSubstringsSeparatedBy:$:
     '1 one 2 two 3 three 4 four 5 five' withCRs asCollectionOfSubstringsSeparatedBy:Character space
    "
!

asCollectionOfSubstringsSeparatedByAll:aSeparatorString
    "return a collection containing the lines (separated by aSeparatorString)
     of the receiver. If aSeparatorString occurs multiple times in a row,
     the result will contain empty strings."

    ^ self asCollectionOfSubCollectionsSeparatedByAll:aSeparatorString

    "
     '1::2::3::4::5::' asCollectionOfSubstringsSeparatedByAll:'::'
    "
!

asCollectionOfSubstringsSeparatedByAny:aCollectionOfSeparators
    "return a collection containing the words (separated by any character
     from aCollectionOfSeparators) of the receiver.
     This allows breaking up strings using any character as separator."

    ^ self asCollectionOfSubCollectionsSeparatedByAny:aCollectionOfSeparators

    "
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:#($:)
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:':'
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:(Array with:$: with:Character space)
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:': '
     'h1e2l3l4o' asCollectionOfSubstringsSeparatedByAny:($1 to: $9)
    "
!

asCollectionOfWords
    "return a collection containing the words (separated by whitespace)
     of the receiver. Multiple occurrences of whitespace characters will
     be treated like one - i.e. whitespace is skipped."

    |words|

    words := OrderedCollection new.
    self asCollectionOfWordsDo:[:w | words add:w].
    ^ words

    "
     'hello world isnt this nice' asCollectionOfWords
     '    hello    world   isnt   this   nice  ' asCollectionOfWords
     'hello' asCollectionOfWords
     '' asCollectionOfWords
     '      ' asCollectionOfWords
    "
!

asCollectionOfWordsDo:aBlock
    "evaluate aBlock for each word (separated by whitespace) of the receiver.
     Multiple occurrences of whitespace characters will be treated like one
     - i.e. whitespace is skipped.
     Returns the number of words (i.e. the number of invocations of aBlock)."

    |count  "{ Class:SmallInteger }"
     start  "{ Class:SmallInteger }"
     stop   "{ Class:SmallInteger }"
     mySize "{ Class:SmallInteger }"|

    count := 0.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	start := self indexOfNonSeparatorStartingAt:start.
	start == 0 ifTrue:[
	    ^ count
	].
	stop := self indexOfSeparatorStartingAt:start.
	stop == 0 ifTrue:[
	    aBlock value:(self copyFrom:start to:mySize).
	    ^ count + 1
	].
	aBlock value:(self copyFrom:start to:(stop - 1)).
	start := stop.
	count := count + 1
    ].
    ^ count

    "
     'hello world isnt this nice' asCollectionOfWordsDo:[:w | Transcript showCR:w]
     '    hello    world   isnt   this   nice  ' asCollectionOfWordsDo:[:w | Transcript showCR:w]
     'hello' asCollectionOfWordsDo:[:w | Transcript showCR:w]
     '' asCollectionOfWordsDo:[:w | Transcript showCR:w]
     '      ' asCollectionOfWordsDo:[:w | Transcript showCR:w]
    "
!

asComposedText
    "ST-80 compatibility
     - ST/X does not (as today) support composedTexts."

    ^ ComposedText fromString:self string

    "Modified: 27.4.1996 / 13:30:30 / cg"
!

asFilename
    "return a Filename with pathname taken from the receiver"

    ^ Filename named:self "(self asSingleByteStringReplaceInvalidWith:$?)"

    "Modified: 20.5.1996 / 09:38:15 / cg"
!

asFixedPoint
    "read a fixedPoint number from the receiver.
     Notice, that errors may occur during the read,
     so you better setup some signal handler when using this method."

    ^ FixedPoint readFromString:self

    "
     '0.123' asFixedPoint
     '12345' asFixedPoint
     '(1/5)' asFixedPoint
     'foo' asFixedPoint
     Object errorSignal handle:[:ex | ex return:0] do:['foo' asFixedPoint]
    "

    "Modified: / 25.10.1997 / 15:19:00 / cg"
!

asFixedPoint:scale
    "read a fixedPoint number with scale number of post-decimal digits
     from the receiver. Scale controls the number of displayed digits,
     not the number of actually valid digits.
     Notice, that errors may occur during the read,
     so you better setup some signal handler when using this method."

    ^ (FixedPoint readFromString:self) scale:scale

    "
     '0.123' asFixedPoint:2
     '123456' asFixedPoint:2
     ('3.14157' asFixedPoint:1) asFixedPoint:5
     '3.14157' asFixedPoint:2
     'foo' asFixedPoint:2
    "

    "Modified: / 25.10.1997 / 15:21:57 / cg"
!

asFloat
    "read a float number from the receiver.
     Notice, that errors may occur during the read,
     so you better setup some exception handler when using this method."

    ^ (Number readFromString:self) asFloat

    "
     '0.123' asFloat
     '12345' asFloat
     '(1/5)' asFloat
     Object errorSignal handle:[:ex | ex return:0] do:['foo' asFloat]
    "
!

asInteger
    "read an integer from the receiver.
     Notice, that errors may occur during the read,
     so you better setup some exception handler when using this method."

    ^ Integer readFromString:self

    "
     '12345678901234567890' asInteger
     '-1234' asInteger
     '0.123' asInteger   <- reader stops at ., returning 0 here
     '0.123' asNumber    <- returns what you expect
     Object errorSignal handle:[:ex | ex return:0] do:['foo' asInteger]
    "
!

asLowercase
    "return a copy of myself in lowercase letters"

    |newStr c bitsPerCharacter
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    bitsPerCharacter := newStr bitsPerCharacter.

    1 to:mySize do:[:i |
	c := (self at:i) asLowercase.
	c bitsPerCharacter > bitsPerCharacter ifTrue:[
	    newStr := c stringSpecies fromString:newStr.
	].
	newStr at:i put:c
    ].
    ^ newStr

    "
     'HelloWorld' asLowercase
     'HelloWorld' asLowercaseFirst
    "
!

asLowercaseFirst
    "return a copy of myself where the first character is converted to lowercase."

    |newString firstChar firstCharAsLowercase|

    firstChar := (self at:1).
    firstCharAsLowercase := firstChar asLowercase.
    firstChar == firstCharAsLowercase ifTrue:[ ^ self].

    firstCharAsLowercase bitsPerCharacter > self bitsPerCharacter ifTrue:[
	newString := firstCharAsLowercase stringSpecies fromString:self.
    ] ifFalse:[
	newString := self stringSpecies fromString:self.
    ].
    newString at:1 put:firstCharAsLowercase.
    ^ newString

    "
     'HelloWorld' asLowercase
     'HelloWorld' asLowercaseFirst
    "
!

asNumber
    "read a number from the receiver.
     Notice, that (in contrast to ST-80) errors may occur during the read,
     so you better setup some signal handler when using this method.
     This may change if ANSI specifies it."

"/ ST-80 behavior:
"/  ^ Number readFromString:self onError:0

    ^ Number readFromString:self

    "
     '123'     asNumber
     '123.567' asNumber
     '(5/6)'   asNumber
     'foo'     asNumber
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asNumber]
    "
!

asNumberFromFormatString:ignored
    "read a number from the receiver, ignoring any nonDigit characters.
     This is typically used to convert from strings which include
     dollar-signs or millenium digits. However, this method also ignores
     the decimal point (if any) and therefore should be used with care."

    |tempString|

    tempString := self collect:[:char | char isDigit].
    ^ Number readFromString:tempString onError:0

    "
     'USD 123' asNumberFromFormatString:'foo'
     'DM 123'  asNumberFromFormatString:'foo'
     '123'     asNumberFromFormatString:'foo'
     '123.567' asNumberFromFormatString:'foo'
     '(5/6)'   asNumberFromFormatString:'foo'
     'foo'     asNumberFromFormatString:'foo'
    "
!

asOneByteString
    "return the receiver converted to a 'normal' string.
     Same as asSingleByteString - for compatibility."

    ^ self asSingleByteString
!

asSingleByteString
    "return the receiver converted to a 'normal' string.
     Raises an error if unrepresentable characters are encountered."

    ^ String fromString:self
!

asSingleByteStringIfPossible
    "if possible, return the receiver converted to a 'normal' string.
     It is only possible, if there are no characters with codePoints above 255 in the receiver."

    self bitsPerCharacter == 8 ifTrue:[^ self].
    (self contains:[:char | char codePoint > 255]) ifFalse:[^ self asSingleByteString].
    ^ self

    "
     'hello' asSingleByteStringIfPossible
     'hello' asUnicodeString asSingleByteStringIfPossible
    "
!

asSingleByteStringReplaceInvalidWith:replacementCharacter
    "return the receiver converted to a 'normal' string,
     with invalid characters replaced by replacementCharacter.
     Can be used to convert from 16-bit strings to 8-bit strings
     and replace characters above code-255 with some replacement."

    |newString|

    newString := String new:(self size).
    1 to:self size do:[:idx |
	|char|

	char := self at:idx.
	char codePoint <= 16rFF ifTrue:[
	    newString at:idx put:char
	] ifFalse:[
	    newString at:idx put:replacementCharacter
	].
    ].
    ^ newString

    "Created: 30.6.1997 / 13:02:14 / cg"
!

asSoundexCode
    "return a soundex string or nil.
     Soundex returns similar codes for similar sounding words, making it a useful
     tool when searching for words where the correct spelling is unknown.
     (read Knuth or search the web if you dont know what a soundex code is).
     Caveat: 'similar sounding words' means: 'similar sounding in english'."

    |s d ch last lch n codes sc|

    s := self readStream.
    s skipSeparators.
    s atEnd ifTrue:[
	^ nil
    ].
    ch := s next.
    ch isLetter ifFalse:[
	^ nil
    ].
    n := 0.

    codes := Dictionary new.
    codes atAll:'bpfv'     put:$1.
    codes atAll:'cskgjqxz' put:$2.
    codes atAll:'dt'       put:$3.
    codes at:$l put:$4.
    codes atAll:'nm'       put:$5.
    codes at:$r put:$6.

    d := String writeStream.
    d nextPut:(ch asUppercase).

    [s atEnd] whileFalse:[
	ch := s next.
	lch := ch asLowercase.
	lch = last ifFalse:[
	    last := lch.

	    sc := codes at:ch ifAbsent:nil.
	    sc notNil ifTrue:[
		n < 3 ifTrue:[
		    d nextPut:sc.
		    n := n + 1.
		]
	    ] ifFalse:[
"/                ch isLetter ifFalse:[
"/                    "/ something else - ignore it
"/                ] ifTrue:[
"/                    "/ else its a vowel and we ignore it
"/                ]
	    ].
	]
    ].
    [ n < 3 ] whileTrue:[
	d nextPut:$0.
	n := n + 1.
    ].

    ^ d contents

    "
     'claus' asSoundexCode
     'clause' asSoundexCode
     'close' asSoundexCode
     'smalltalk' asSoundexCode
     'smaltalk' asSoundexCode
     'smaltak' asSoundexCode
     'smaltok' asSoundexCode
     'smoltok' asSoundexCode
     'aa' asSoundexCode
     'by' asSoundexCode
     'bab' asSoundexCode
     'bob' asSoundexCode
     'bop' asSoundexCode
    "
!

asString
    "return myself - I am a string"

    ^ self
!

asStringCollection
    "return a collection of lines from myself."

    ^ StringCollection fromString:self "string"

    "Modified: 13.5.1996 / 20:36:59 / cg"
!

asSymbol
    "return a unique symbol with the name taken from my characters.
     The receiver must be a singleByte-String.
     TwoByte- and FourByteSymbols are (currently ?) not allowed."

    |str|

    str := self string.
    str ~~ self ifTrue:[ ^ str asSymbol ].
    ^ self asSingleByteString asSymbol
!

asSymbolIfInterned
    "if a symbol with the receivers characters is already known, return it.
     Otherwise, return nil. This can be used to query for an existing
     symbol and is the same as
	self knownAsSymbol ifTrue:[self asSymbol] ifFalse:[nil]
     but slightly faster, since the symbol lookup operation is only performed once.
     The receiver must be a singleByte-String.
     TwoByte- and FourByteSymbols are (currently ?) not allowed."

    |s|


    s := self string.
    s ~~ self ifTrue:[
       ^ s asSymbolIfInterned
    ].
    ^ nil.

    "Created: 22.5.1996 / 16:37:04 / cg"
!

asText
    "return a Text-object (collection of lines) from myself."

    "this test allows for small non-gui apps to be built without libbasic2 (where Text is)"
    Text isNil ifTrue:[^ self].
    ^ Text fromString:self

    "Created: 12.5.1996 / 10:41:14 / cg"
!

asTitlecase
    "return a version of the receiver, where the first character is converted to titlecase,
     and everything else to lowercase.
     See the comment in Character on what titlecase is."

    |newStr c bitsPerCharacter
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    bitsPerCharacter := newStr bitsPerCharacter.

    1 to:mySize do:[:i |
	i == 1 ifTrue:[
	    c := (self at:i) asTitlecase.
	] ifFalse:[
	    c := (self at:i) asLowercase.
	].
	c bitsPerCharacter > bitsPerCharacter ifTrue:[
	    newStr := c stringSpecies fromString:newStr.
	].
	newStr at:i put:c
    ].
    ^ newStr

    "
     'helloWorld' asTitlecase
     'HelloWorld' asTitlecase
     'HELLOWORLD' asTitlecase
     'helloworld' asTitlecase
    "
!

asTitlecaseFirst
    "return a version of the receiver, where the first character is converted to titlecase.
     Titlecase is much like uppercase for most characters, with the exception of some combined
     (2-character glyphs), which consist of an upper- and lower-case characters.
     If the first character is already titlecase, or there is no titlecasepercase for it, return the
     receiver."

    "
     For example, in Unicode, character U+01F3 is LATIN SMALL LETTER DZ.
     (Let us write this compound character using ASCII as 'dz'.)
     This character uppercases to character U+01F1, LATIN CAPITAL LETTER DZ.
     (Which is basically 'DZ'.)
     But it titlecases to to character U+01F2, LATIN CAPITAL LETTER D WITH SMALL LETTER Z.
     (Which we can write 'Dz'.)

      character uppercase titlecase
      --------- --------- ---------
      dz        DZ        Dz
    "

    |newString firstChar firstCharAsTitlecase|

    firstChar := (self at:1).
    firstCharAsTitlecase := firstChar asTitlecase.
    firstChar == firstCharAsTitlecase ifTrue:[ ^ self].

    firstCharAsTitlecase bitsPerCharacter > self bitsPerCharacter ifTrue:[
	newString := firstCharAsTitlecase stringSpecies fromString:self.
    ] ifFalse:[
	newString := self stringSpecies fromString:self.
    ].
    newString at:1 put:firstCharAsTitlecase.
    ^ newString

    "
     'helloWorld' asTitlecaseFirst
     'HelloWorld' asTitlecaseFirst
    "
!

asTwoByteString
    "return the receiver converted to a two-byte string.
     Will be obsolete soon."

    ^ TwoByteString fromString:self
!

asURI
    "return an URI with string taken from the receiver"

    ^ URI fromString:self
!

asURL
    "return an URL-object from myself."

    ^ URL fromString:self

    "
     'http://www.exept.de:80/index.html' asURL host
     'http://www.exept.de:80/index.html' asURL port
     'http://www.exept.de:80/index.html' asURL method
     'http://www.exept.de:80/index.html' asURL path
    "
!

asUnicode16String
    "thats not really true - characters above ascii 16r7F may need special treatment"

    ^ ((Unicode16String new:self size) replaceFrom:1 to:self size with:self startingAt:1)
!

asUnicode32String
    "thats not really true - characters above ascii 16r7F may need special treatment"

    ^ ((Unicode32String new:self size) replaceFrom:1 to:self size with:self startingAt:1)
!

asUnicodeString
    "thats not really true - characters above ascii 16r7F may need special treatment"

    ^ ((UnicodeString new:self size) replaceFrom:1 to:self size with:self startingAt:1)
!

asUppercase
    "return a copy of myself in uppercase letters"

    |newStr c bitsPerCharacter
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    bitsPerCharacter := newStr bitsPerCharacter.

    1 to:mySize do:[:i |
	c := (self at:i) asUppercase.
	c bitsPerCharacter > bitsPerCharacter ifTrue:[
	    newStr := c stringSpecies fromString:newStr.
	].
	newStr at:i put:c
    ].
    ^ newStr

    "
     'helloWorld' asUppercase
     'helloWorld' asUppercaseFirst
     (Character value:16rB5) asString asUppercase   -- needs 16 bits !!
    "
!

asUppercaseFirst
    "return a version of the receiver, where the first character is converted to uppercase.
     If the first character is already uppercase, or there is no uppercase for it, return the
     receiver."

    |newString firstChar firstCharAsUppercase|

    firstChar := (self at:1).
    firstCharAsUppercase := firstChar asUppercase.
    firstChar == firstCharAsUppercase ifTrue:[ ^ self].

    firstCharAsUppercase bitsPerCharacter > self bitsPerCharacter ifTrue:[
	newString := firstCharAsUppercase stringSpecies fromString:self.
    ] ifFalse:[
	newString := self stringSpecies fromString:self.
    ].
    newString at:1 put:firstCharAsUppercase.
    ^ newString

    "
     'helloWorld' asUppercase
     'helloWorld' asUppercaseFirst
     'HelloWorld' asUppercaseFirst
    "
!

string
    "return the receiver - for ST-80 compatibility"

    ^ self

    "Modified: 27.4.1996 / 13:29:30 / cg"
!

tokensBasedOn:aCharacter
    "this is an ST-80 alias for the ST/X method
	asCollectionOfSubstringsSeparatedBy:"

    ^ self asCollectionOfSubstringsSeparatedBy:aCharacter

    "
     'hello:world:isnt:this nice' tokensBasedOn:$:
     'foo,bar,baz' tokensBasedOn:$,
     '/etc/passwd' asFilename readStream nextLine tokensBasedOn:$:
    "
! !

!CharacterArray methodsFor:'copying'!

, aStringOrCharacter
    "redefined to allow characters and mixed strings to be appended.
     This is nonStandard, but convenient"

    |myWidth otherWidth|

    aStringOrCharacter isCharacter ifTrue:[
	^ self copyWith:aStringOrCharacter
    ].
    aStringOrCharacter isText ifTrue:[
	^ aStringOrCharacter concatenateFromString:self
    ].
    aStringOrCharacter isString ifTrue:[
	(otherWidth := aStringOrCharacter bitsPerCharacter) ~~ (myWidth := self bitsPerCharacter) ifTrue:[
	    otherWidth > myWidth ifTrue:[
		^ (aStringOrCharacter species fromString:self) , aStringOrCharacter
	    ].
	    ^ self , (self species fromString:aStringOrCharacter)
	].
    ].
    ^ super , aStringOrCharacter

    "
     'hello' , $1
     'hello' , '1'
     'hello' , (' world' asText allBold)
     'hello' , (JISEncodedString fromString:' world')
     (JISEncodedString fromString:'hello') , ' world'

     Transcript showCR:
	 (Text string:'hello' emphasis:#italic) , (Text string:' world' emphasis:#bold)
    "

    "Modified: 28.6.1997 / 00:13:17 / cg"
!

,, aString
    "concatenate with a newLine in between"

    ^ (self copyWith:Character cr) , aString

   "
     hello ,, world
     'hello' ,, 'world'
   "
!

concatenate:string1 and:string2
    "return the concatenation of myself and the arguments, string1 and string2.
     This is equivalent to self , string1 , string2
     - generated by compiler when such a construct is detected and the receiver
     is known to be a string."

    ^ self , string1 , string2
!

concatenate:string1 and:string2 and:string3
    "return the concatenation of myself and the string arguments.
     This is equivalent to self , string1 , string2 , string3
     - generated by compiler when such a construct is detected and the receiver
     is known to be a string."

    ^ self , string1 , string2 , string3
!

copyReplaceString:subString withString:newString
    "return a copy of the receiver, with all sequences of subString replaced
     by newString (i.e. slice in the newString in place of the oldString)."

    |tmpStream idx idx1|

    tmpStream := WriteStream on:(self class new).
    idx := 1.
    [idx ~~ 0] whileTrue:[
	idx1 := idx.
	idx := self indexOfSubCollection:subString startingAt:idx.
	idx ~~ 0 ifTrue:[
	    tmpStream nextPutAll:(self copyFrom:idx1 to:idx-1).
	    tmpStream nextPutAll:newString.
	    idx := idx + subString size
	]
    ].
    tmpStream nextPutAll:(self copyFrom:idx1).
    ^ tmpStream contents

   "
     '12345678901234567890' replString:'123' withString:'OneTwoThree'
     '12345678901234567890' replString:'123' withString:'*'
     '12345678901234567890' replString:'234' withString:'foo'

     ('a string with spaces' replChar:$  withString:' foo ')
	replString:'foo' withString:'bar'
    "

    "Modified: / 31-05-1999 / 12:33:59 / cg"
    "Created: / 12-05-2004 / 12:00:00 / cg"
!

copyWith:aCharacter
    "return a new string containing the receivers characters
     and the single new character, aCharacter.
     This is different from concatentation, which expects another string
     as argument, but equivalent to copy-and-addLast.
     The code below cares for different width characters
     (i.e. when appending a 16bit char to an 8bit string)"

    |sz newString|

    aCharacter bitsPerCharacter > self bitsPerCharacter ifTrue:[
	sz := self size.
	newString := aCharacter stringSpecies new:sz + 1.
	newString replaceFrom:1 to:sz with:self startingAt:1.
	newString at:sz+1 put:aCharacter.
	^ newString.
    ].
    ^ super copyWith:aCharacter
! !

!CharacterArray methodsFor:'displaying'!

displayOn:aGC x:x y:y from:start to:stop
    "display the receiver on a GC"

    "q&d hack"

    (self copyFrom:start to:stop) displayOn:aGC x:x y:y opaque:false

    "Modified: 12.5.1996 / 12:49:33 / cg"
!

displayOn:aGc x:x y:y opaque:opaque
    "display the receiver in a graphicsContext - this method allows
     strings to be used like DisplayObjects."

    |s|

    s := self string.
    opaque ifTrue:[
	aGc displayOpaqueString:s x:x y:y.
    ] ifFalse:[
	aGc displayString:s x:x y:y.
    ].

    "Modified: 11.5.1996 / 14:42:48 / cg"
!

displayOpaqueOn:aGC x:x y:y from:start to:stop
    "display the receiver on a GC"

    "q&d hack"

    (self copyFrom:start to:stop) displayOn:aGC x:x y:y opaque:true

    "Created: 12.5.1996 / 12:29:37 / cg"
    "Modified: 12.5.1996 / 12:49:19 / cg"
! !

!CharacterArray methodsFor:'emphasis'!

allBold
    "return a test object representing the receiver, but all boldified"

    "this test allows for small non-gui apps to be built without libbasic2 (where Text is)"
    Text isNil ifTrue:[^ self].
    ^ self asText allBold

    "
     Transcript showCR:'hello' asText allBold
     Transcript showCR:'hello' allBold
    "
!

allItalic
    "return a test object representing the receiver, but all in italic"

    "this test allows for small non-gui apps to be built without libbasic2 (where Text is)"
    Text isNil ifTrue:[^ self].
    ^ self asText allItalic

    "
     Transcript showCR:'hello' asText allItalic
     Transcript showCR:'hello' allItalic
    "
!

colorizeAllWith:aColor
    ^ self asText colorizeAllWith:aColor

    "
     Transcript showCR:('hello' colorizeAllWith:Color red)
     Transcript showCR:('world' colorizeAllWith:Color green darkened)
    "
!

emphasis
    "return the emphasis.
     Since characterArrays do not hold any emphasis information,
     nil (no emphasis) is returned here."

    ^ RunArray new:self size withAll:nil

    "Created: 14.5.1996 / 13:58:58 / cg"
!

emphasis:emphasisCollection
    ^ self asText emphasis:emphasisCollection

    "
     Transcript showCR:('hello' emphasis:#(bold bold bold bold bold))
    "
!

emphasisAt:characterIndex
    "return the emphasis at some index.
     Since characterArrays do not hold any emphasis information,
     nil (no emphasis) is returned here."

    ^ nil

    "Created: 11.5.1996 / 14:13:27 / cg"
!

emphasisCollection
    "return the emphasis.
     Since characterArrays do not hold any emphasis information,
     nil (no emphasis) is returned here."

    ^ RunArray new:(self size)

    "Created: 14.5.1996 / 13:58:58 / cg"
    "Modified: 14.5.1996 / 15:02:29 / cg"
!

emphasizeAllWith:emphasis
    ^ self asText emphasizeAllWith:emphasis

    "
     Transcript showCR:('hello' emphasizeAllWith:#bold)
     Transcript showCR:('hello' emphasizeAllWith:(#color -> Color red))
     Transcript showCR:('hello' emphasizeAllWith:(#color -> Color red))
    "

    "Modified: / 17.6.1998 / 12:51:44 / cg"
!

makeSelectorBoldIn:aClass
    "the receiver represents some source code for
     a method in aClass.
     Change myself to boldify the selector.
     Not yet implemented (could easily use the syntaxHighlighter
     for this ...)"

    ^ self

    "Modified: / 13.12.1999 / 21:49:11 / cg"
    "Created: / 13.12.1999 / 21:49:24 / cg"
!

withoutEmphasis:emphasisToRemove
    ^ self
! !

!CharacterArray methodsFor:'encoding & decoding'!

decodeFrom:encodingSymbol
    "given the receiver encoded as described by encodingSymbol,
     convert it into internal ST/X (unicode) encoding and return a corresponding CharacterArray."

    |myEncoding encoder|

    encodingSymbol isNil ifTrue:[^ self].
    myEncoding := self encoding.
    encodingSymbol == myEncoding ifTrue:[^ self].

    encoder := CharacterEncoder encoderToEncodeFrom:(self encoding) into:encodingSymbol.
    encoder isNil ifTrue:[^ self].
    ^ encoder decodeString:self.
!

encodeFrom:oldEncoding into:newEncoding
    ^ CharacterEncoder encodeString:self from:oldEncoding into:newEncoding
!

rot13
     "Usenet: from `rotate alphabet 13 places']
      The simple Caesar-cypher encryption that replaces each English
      letter with the one 13 places forward or back along the alphabet,
      so that 'The butler did it!!' becomes 'Gur ohgyre qvq vg!!'
      Most Usenet news reading and posting programs include a rot13 feature.
      It is used to enclose the text in a sealed wrapper that the reader must choose
      to open -- e.g., for posting things that might offend some readers, or spoilers.
      A major advantage of rot13 over rot(N) for other N is that it
      is self-inverse, so the same code can be used for encoding and decoding."

    ^ self species
	streamContents:[:aStream |
	    self do:[:char |
		aStream nextPut:char rot13 ]]

    "
     'hello world' rot13
     'hello world' rot13 rot13
    "
!

utf8Decoded
    "Interpreting myself as an UTF-8 representation, decode and return the decoded string."

    |in out is16Bit c|

    is16Bit := false.
    out := WriteStream on:(String uninitializedNew:self size).
    in := self readStream.
    [in atEnd] whileFalse:[
	c := Character utf8DecodeFrom:in.
	is16Bit ifFalse:[
	    c codePoint > 16rFF ifTrue:[
		out := WriteStream with:(UnicodeString fromString:out contents).
		is16Bit := true.
	    ].
	].
	out nextPut:c.
    ].
    ^ out contents

    "
     #[16rC8 16rA0] asString utf8Decoded
     (Character value:16r220) utf8Encoded
     (Character value:16r220) utf8Encoded utf8Decoded

     (Character value:16r800) utf8Encoded
     (Character value:16r220) utf8Encoded utf8Decoded
    "

    "test:

      |utf8Encoding original readBack|

      1 to:16rFFFF do:[:ascii |
	original := (Character value:ascii) asString.
	utf8Encoding := original utf8Encoded.
	readBack := utf8Encoding utf8Decoded.
	readBack = original ifFalse:[
	    self halt
	]
      ]
    "
!

utf8DecodedWithTwoByteCharactersReplacedBy:replacementCharacter
    "Interpreting myself as an UTF-8 representation, decode and return
     the decoded string. Suppress all 2-byte (above 16rFF) characters,
     and replace them with replacementCharacter"

    |in out c|

    out := WriteStream on:(String uninitializedNew:self size).
    in := self readStream.
    [in atEnd] whileFalse:[
	c := Character utf8DecodeFrom:in.
	c codePoint > 16rFF ifTrue:[
	    c := replacementCharacter
	].
	out nextPut:c.
    ].
    ^ out contents

    "
     (Character value:16r220) utf8Encoded
	utf8DecodedWithTwoByteCharactersReplacedBy:(Character space)
    "
!

utf8Encoded
    "Return my UTF-8 representation as a new String"

    |s|

    s := WriteStream on:(String uninitializedNew:self size).
    self utf8EncodedOn:s.
    ^ s contents
!

utf8EncodedOn:aStream
    "append my UTF-8 representation to the argument, aStream."


    self do:[:c|
	c utf8EncodedOn:aStream.
    ].
! !

!CharacterArray methodsFor:'padded copying'!

centerPaddedTo:newSize
     "return a new string consisting of the receivers characters,
     plus spaces up to length and center the receivers characters in
     the resulting string.
     If the receivers size is equal or greater than the length argument,
     the original receiver is returned unchanged."

     ^ self centerPaddedTo:newSize with:(Character space)

    "
     'foo' centerPaddedTo:10
     123 printString centerPaddedTo:10
    "

    "Created: 25.11.1995 / 10:53:57 / cg"
!

centerPaddedTo:size with:padCharacter
    "return a new string of length size, which contains the receiver
     centered (i.e. padded on both sides).
     Characters are filled with padCharacter.
     If the receivers size is equal or greater than the length argument,
     the original receiver is returned unchanged."

    |len s|

    len := self size.
    (len < size) ifTrue:[
	s := self species new:size withAll:padCharacter.
	s replaceFrom:(size - len) // 2  + 1 with:self.
	^ s
    ]

    "
     'foo' centerPaddedTo:11 with:$.
     'fooBar' centerPaddedTo:5 with:$.
     123 printString centerPaddedTo:10 with:$.
     (' ' , 123 printString) centerPaddedTo:10 with:$.
     (Float pi printString) centerPaddedTo:15 with:(Character space)
     (Float pi printString) centerPaddedTo:15 with:$-
     (' ' , Float pi class name) centerPaddedTo:15 with:$.
    "
!

decimalPaddedTo:size and:afterPeriod at:decimalCharacter
    "return a new string of overall length size, which contains the receiver
     aligned at the decimal-period column and afterPeriod characters to the right
     of the period. The periodCharacter is passed as arguments (allowing for US and European formats
     to be padded).
     If the receivers size is equal or greater than the length argument,
     the original receiver is returned unchanged.
     (sounds complicated ? -> see examples below)."

    ^ self
	decimalPaddedTo:size
	and:afterPeriod
	at:decimalCharacter
	withLeft:(Character space)
	right:$0

    "
     '123' decimalPaddedTo:10 and:3 at:$.      -> '   123    '
     '123' decimalPaddedTo:10 and:3 at:$.      -> '   123.000'
     '123.' decimalPaddedTo:10 and:3 at:$.     -> '   123.000'
     '123.1' decimalPaddedTo:10 and:3 at:$.    -> '   123.100'
     '123.1' decimalPaddedTo:10 and:3 at:$.    -> '   123.1  '
     '123.123' decimalPaddedTo:10 and:3 at:$.  -> '   123.123'
    "

    "Created: 23.12.1995 / 13:11:52 / cg"
!

decimalPaddedTo:size and:afterPeriod at:decimalCharacter withLeft:leftPadChar right:rightPadChar
    "return a new string of overall length size, which contains the receiver
     aligned at the decimal-period column and afterPeriod characters to the right
     of the period.
     Characters on the left are filled with leftPadChar.
     If rightPadChar is nil, characters on the right are filled with leftPadCharacter too;
     otherwise, if missing, a decimal point is added and right characters filled with this.
     If the receivers size is equal or greater than the length argument,
     the original receiver is returned unchanged.
     (sounds complicated ? -> see examples below)."

    |s idx n rest|

    idx := self indexOf:decimalCharacter.
    idx == 0 ifTrue:[
	"/
	"/ no decimal point found; adjust string to the left of the period column
	"/
	rightPadChar isNil ifTrue:[
	    s := self , (self species new:afterPeriod + 1 withAll:leftPadChar)
	] ifFalse:[
	    s:= self , decimalCharacter asString , (self species new:afterPeriod withAll:rightPadChar).
	].
    ] ifFalse:[

	"/ the number of after-decimalPoint characters
	n := self size - idx.
	rest := afterPeriod - n.
	rest > 0 ifTrue:[
	    s := (self species new:rest withAll:(rightPadChar ? leftPadChar)).
	] ifFalse:[
	    s := ''
	].
	s := self , s.
    ].

    ^ s leftPaddedTo:size with:leftPadChar

    "
     '123' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:nil     -> '   123    '
     '123' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:$0      -> '   123.000'
     '123.' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:$0     -> '   123.000'
     '123.1' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:$0    -> '   123.100'
     '123.1' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:nil   -> '   123.1  '
     '123.123' decimalPaddedTo:10 and:3 at:$. withLeft:(Character space) right:$0  -> '   123.123'
    "

    "Modified: 23.12.1995 / 13:08:18 / cg"
!

leftPaddedTo:size
    "return a new string of length size, which contains the receiver
     right-adjusted (i.e. padded on the left).
     Characters on the left are filled with spaces.
     If the receivers size is equal or greater than the length argument,
     the original receiver is returned unchanged."

    ^ self leftPaddedTo:size with:(Character space)

    "
     'foo' leftPaddedTo:10
     'fooBar' leftPaddedTo:5
     123 printString leftPaddedTo:10
    "
!

paddedTo:newSize
     "return a new string consisting of the receivers characters,
     plus spaces up to length.
     If the receivers size is equal or greater than the length argument,
     the original receiver is returned unchanged."

     ^ self paddedTo:newSize with:(Character space)

    "
     'foo' paddedTo:10
     123 printString paddedTo:10
    "
! !

!CharacterArray methodsFor:'pattern matching'!

compoundMatch:aString
    "like match, but the receiver may be a compound match pattern,
     consisting of multiple simple patterns, separated by semicolons.
     This is usable with fileName pattern fields."

    ^self compoundMatch:aString ignoreCase:false

    "
     'f*' match:'foo'
     'b*' match:'foo'
     'f*;b*' match:'foo'
     'f*;b*' match:'bar'
     'f*;b*' compoundMatch:'foo'
     'f*;b*' compoundMatch:'bar'
    "

    "Modified: / 30.1.1998 / 11:40:18 / stefan"
    "Modified: / 16.12.1999 / 01:22:08 / cg"
!

compoundMatch:aString ignoreCase:ignoreCase
    "like match, but the receiver may be a compound match pattern,
     consisting of multiple simple patterns, separated by semicolons.
     This is usable with fileName pattern fields."

    |matchers|

    matchers := self asCollectionOfSubstringsSeparatedBy:$;.
    matchers do:[:aPattern |
	(aPattern match:aString ignoreCase:ignoreCase) ifTrue:[^ true].
    ].
    ^ false.

    "
     'f*' match:'foo'
     'b*' match:'foo'
     'f*;b*' match:'foo'
     'f*;b*' match:'bar'
     'f*;b*' compoundMatch:'foo'
     'f*;b*' compoundMatch:'bar'
     'f*;b*' compoundMatch:'Foo' ignoreCase:true
    "

    "Modified: / 15.4.1997 / 15:50:33 / cg"
    "Modified: / 30.1.1998 / 11:40:18 / stefan"
    "Created: / 16.12.1999 / 01:21:35 / cg"
!

findMatchString:matchString
    "like findString/indexOfSubCollection, but allowing match patterns.
     find matchstring; if found, return the index;
     if not found, return 0."

    ^ self findMatchString:matchString startingAt:1 ignoreCase:false ifAbsent:0
!

findMatchString:matchString startingAt:index
    "like findString, but allowing match patterns.
     find matchstring, starting at index. if found, return the index;
     if not found, return 0."

    ^ self findMatchString:matchString startingAt:index ignoreCase:false ifAbsent:0
!

findMatchString:matchString startingAt:index ignoreCase:ignoreCase ifAbsent:exceptionBlock
    "like findString, but allowing match patterns.
     find matchstring, starting at index. if found, return the index;
     if not found, return the result of evaluating exceptionBlock.
     This is a q&d hack - not very efficient"

    |firstChar firstSet
     startIndex "{ Class: SmallInteger }"
     matchSize  "{ Class: SmallInteger }"
     mySize     "{ Class: SmallInteger }"
     realMatchString lcChar ucChar|

    matchSize := matchString size.
    matchSize == 0 ifTrue:[^ index]. "empty string matches"

    realMatchString := matchString.
    (realMatchString endsWith:$*) ifFalse:[
	realMatchString := realMatchString , '*'.
	matchSize := matchSize + 1
    ].

    mySize := self size.
    firstChar := realMatchString at:1.
    firstChar == self class matchEscapeCharacter ifTrue:[
	firstChar := realMatchString at:2.
    ].

    firstChar asString includesMatchCharacters ifTrue:[
	index to:mySize do:[:col |
	    (realMatchString match:self from:col to:mySize ignoreCase:ignoreCase)
	    ifTrue:[^ col]
	].
	^ exceptionBlock value.
    ].

    lcChar := firstChar asLowercase.
    ucChar := firstChar asUppercase.
    (ignoreCase and:[ lcChar ~= ucChar]) ifTrue:[
	firstSet := Array with:ucChar with:lcChar.
	startIndex := self indexOfAny:firstSet startingAt:index.
    ] ifFalse:[
	startIndex := self indexOf:firstChar startingAt:index.
    ].
    [startIndex == 0] whileFalse:[
	(realMatchString match:self from:startIndex to:mySize ignoreCase:ignoreCase)
	ifTrue:[^ startIndex].
	firstSet notNil ifTrue:[
	    startIndex := self indexOfAny:firstSet startingAt:(startIndex + 1).
	] ifFalse:[
	    startIndex := self indexOf:firstChar startingAt:(startIndex + 1).
	].
    ].
    ^ exceptionBlock value

    "
     'one two three four' findMatchString:'o[nu]'
     'one two three four' findMatchString:'o[nu]' startingAt:3
     'one two three four one' findMatchString:'ONE' startingAt:3 ignoreCase:true ifAbsent:0
    "

    "Modified: 13.9.1997 / 06:31:22 / cg"
!

includesMatchString:matchString
    "like includesString, but allowing match patterns.
     find matchstring; if found, return true, otherwise return false"

    ^ (self findMatchString:matchString) ~~ 0

    "
     'hello world' includesMatchString:'h*'
     'hello world' includesMatchString:'h[aeiou]llo'
     'hello world' includesMatchString:'wor*'
     'hello world' includesMatchString:'woR*'
    "
!

match:aString
    "return true if aString matches self, where self may contain meta-match
     characters $* (to match any string) or $# (to match any character).
     or [...] to match a set of characters.
     Lower/uppercase are considered different.
     NOTICE: match-meta character interpretation is like in unix-matching,
	     NOT the ST-80 meaning."

    ^ self match:aString from:1 to:aString size ignoreCase:false

    "
     '\*f*' match:'f'
     '\*f*' match:'*f'
     '*\*f*' match:'*f'
     '*f*' match:'*f'
     '*ute*' match:'computer'
     '*uter' match:'computer'
     'uter*' match:'computer'
     '*ute*' match:''
     '[abcd]*' match:'computer'
     '[abcd]*' match:'komputer'
     '*some*compl*ern*' match:'this is some more complicated pattern match'
     '*some*compl*ern*' match:'this is another complicated pattern match'
     '*-hh' match:'anton-h'
    "

    "Modified: / 9.6.1998 / 18:50:00 / cg"
!

match:aString from:start to:stop ignoreCase:ignoreCase
    "return true if part of aString matches myself,
     where self may contain meta-match
     characters $* (to match any string) or $# (to match any character)
     or [...] to match a set of characters.
     If ignoreCase is true, lower/uppercase are considered the same.
     NOTICE: match-meta character interpretation is like in unix-matching,
	     NOT the ST-80 meaning."

    |matchScanArray|

    "
     keep the matchScanArray from the most recent match -
     avoids parsing the pattern over-and over if multiple searches
     are done with the same pattern.
    "
    (PreviousMatch notNil
    and:[PreviousMatch key = self]) ifTrue:[
	matchScanArray := PreviousMatch value
    ] ifFalse:[
	matchScanArray := self class matchScanArrayFrom:self.
	matchScanArray isNil ifTrue:[
	    'CharacterArray [info]: invalid matchpattern:''' infoPrint. self infoPrint. ''' comparing for equality.' infoPrintCR.
	    ^ self = aString
"/            ^ false
	].
	PreviousMatch := self -> matchScanArray.
    ].

    ^ self class
	matchScan:matchScanArray
	from:1 to:matchScanArray size
	with:aString
	from:start to:stop
	ignoreCase:ignoreCase

    "
     '*ute*' match:'12345COMPUTER' from:1 to:5 ignoreCase:true
     '*ute*' match:'12345COMPUTER' from:6 to:13 ignoreCase:true
    "

    "Modified: / 10.11.1998 / 21:43:46 / cg"
!

match:aString ignoreCase:ignoreCase
    "return true if aString matches self, where self may contain meta-match
     characters $* (to match any string) or $# (to match any character)
     or [...] to match a set of characters.
     If ignoreCase is true, lower/uppercase are considered the same.
     NOTICE: match-meta character interpretation is like in unix-matching,
	     NOT the ST-80 meaning."

    ^ self match:aString from:1 to:aString size ignoreCase:ignoreCase

    "
     '*ute*' match:'COMPUTER' ignoreCase:true
     '*uter' match:'COMPUTER' ignoreCase:false
     '[abcd]*' match:'computer' ignoreCase:false
     '[abcd]*' match:'Computer' ignoreCase:false
     '[a-k]*' match:'komputer' ignoreCase:false
     '[a-k]*' match:'zomputer' ignoreCase:false
     '[a-k]*' match:'Komputer' ignoreCase:false
     '[a-k]*' match:'Komputer' ignoreCase:true
     '*some*compl*ern*' match:'this is some more complicated pattern match' ignoreCase:true
     '*some*compl*ern*' match:'this is another complicated pattern match' ignoreCase:true

     Time millisecondsToRun:[
	Symbol allInstancesDo:[:sym |
	    '[ab]*' match:sym ignoreCase:false
	]
     ].
     Time millisecondsToRun:[
	Symbol allInstancesDo:[:sym |
	    '*at:*' match:sym ignoreCase:false
	]
     ].
    "

    "Modified: 2.4.1997 / 17:28:58 / cg"
!

matches:aPatternString
    "return true if the receiver matches aString, where aPatternString may contain meta-match
     characters $* (to match any string) or $# (to match any character).
     or [...] to match a set of characters.
     Lower/uppercase are considered different.
     NOTICE: match-meta character interpretation is like in unix-matching,
	     NOT the ST-80 meaning."

    ^ aPatternString match:self


! !

!CharacterArray methodsFor:'printing & storing'!

article
    "return an article string for the receiver."

    |firstChar|

    firstChar := (self at:1) asLowercase.
    ((firstChar isVowel and:[firstChar ~~ $u]) or:[firstChar == $x]) ifTrue:[
	^ 'an'
    ].
    ^ 'a'

    "
	'uboot' article.
	'xmas' article.
	'alarm' article.
	'baby' article.
    "
!

basicStoreString
    "return a String for storing myself"

    |s n index|

    n := self occurrencesOf:$'.
    n == 0 ifFalse:[
	s := String new:(n + 2 + self size).
	s at:1 put:$'.
	index := 2.
	self do:[:thisChar |
	    (thisChar == $') ifTrue:[
		s at:index put:thisChar.
		index := index + 1.
	    ].
	    s at:index put:thisChar.
	    index := index + 1.
	].
	s at:index put:$'.
	^ s
    ].
    ^ '''' , self , ''''
!

displayString
    "return a string to display the receiver - use storeString to have quotes around."

    ^ self storeString
!

printOn:aStream
    "print the receiver on aStream"

    aStream nextPutAll:self
!

printString
    "return a string for printing - thats myself"

    ^ self
!

printWithQuotesDoubledOn:aStream
    "put the raw storeString of myself on aStream"

    self do:[:thisChar |
	(thisChar == $') ifTrue:[aStream nextPut:thisChar].
	aStream nextPut:thisChar
    ]

    "Modified: / 15.6.1998 / 17:21:17 / cg"
    "Created: / 15.6.1998 / 17:22:13 / cg"
! !

!CharacterArray methodsFor:'public'!

isUnarySelector
    "Answer true if the receiver contains only chars in an ANSI unary method selector, false otherwise."

    ^ (self first isLetter or:[ self first = $_ ])
      and:[ self allSatisfy: [ :chr | chr isLetterOrDigit ]]

    "Modified: / 20-04-2005 / 12:20:43 / cg"
! !

!CharacterArray methodsFor:'queries'!

bitsPerCharacter
    "return the underlying strings bitsPerCharacter
     (i.e. is it a regular String or a TwoByteString)"

    |string max|

    (string := self string) ~~ self ifTrue:[
	^ string bitsPerCharacter
    ].

    max := 8.
    self do:[:eachCharacter |
	max := max max:(eachCharacter bitsPerCharacter)
    ].
    ^ max

    "
     'hello' bitsPerCharacter
     'hello' asText allBold bitsPerCharacter
    "
!

contains8BitCharacters
    "return true, if the underlying string contains 8BitCharacters (or widers)
     (i.e. if it is non-ascii)"

    |string|

    (string := self string) ~~ self ifTrue:[
        ^ string contains8BitCharacters
    ].
    ^ self contains:[:aCharacter | aCharacter codePoint > 16r7F ].

    "
     'hello' contains8BitCharacters
     'hello' asText allBold contains8BitCharacters
    "
!

encoding
    "return the strings encoding, as a symbol.
     Here, by default, we assume unicode-encoding.
     Notice, that iso8859-1 is a true subset of unicode,
     and that singleByteStrings are therefore both unicode AND
     8859-1 encoded."

    ^ #'unicode'
!

hasChangeOfEmphasis
    ^ false

    "Created: 12.5.1996 / 12:31:39 / cg"
!

hasIcon
    "for LabelAndIcon compatibility"

    ^ false
!

hasImage
    "for LabelAndIcon compatibility"

    ^ false
!

heightOn:aGC
    "return the size of the recevier in device units if displayed on aGC"

    ^ (aGC font onDevice:aGC device) heightOf:self

    "
     'hello world' heightOn:(View new)
    "

    "Created: 12.5.1996 / 20:09:29 / cg"
    "Modified: 12.5.1996 / 20:32:05 / cg"
!

isString
    "return true, if the receiver is some kind of string;
     true is returned here - redefinition of Object>>isString."

    ^ true
!

isValidSmalltalkIdentifier
    "return true, if the receivers characters make up a valid smalltalk identifier"

    |scanner tok|

    scanner := Compiler new.
    scanner source:(self readStream).
    tok := scanner nextToken.
    tok ~~ #Identifier ifTrue:[
	^ false
    ].
    scanner tokenPosition == 1 ifFalse:[^ false].
    ^ scanner sourceStream atEnd.

    "
     'foo' isValidSmalltalkIdentifier
     '1foo' isValidSmalltalkIdentifier
     '_foo' isValidSmalltalkIdentifier
     '_foo_bar_' isValidSmalltalkIdentifier
     'foo ' isValidSmalltalkIdentifier
     ' foo' isValidSmalltalkIdentifier
    "
!

leftIndent
    "if the receiver starts with spaces, return the number of spaces
     at the left - otherwise, return 0.
     If the receiver consists of spaces only, return the receivers size."

    |index "{Class: SmallInteger }"
     end   "{Class: SmallInteger }"|

    index := 1.
    end := self size.
    [index <= end] whileTrue:[
	(self at:index) isSeparator ifFalse:[^ index - 1].
	index := index + 1
    ].
    ^ end

    "
     '    hello' leftIndent
     'foo      ' leftIndent
     '         ' leftIndent
    "

    "Modified: 20.4.1996 / 19:28:43 / cg"
!

stringSpecies
    "return the underlying strings bitsPerCharacter
     (i.e. is it a regular String or a TwoByteString)"

    |string|

    (string := self string) == self ifTrue:[^ self class].
    ^ string stringSpecies

    "
     'hello' stringSpecies
     'hello' asText allBold stringSpecies
    "
!

widthFrom:startIndex to:endIndex on:aGC
    "return ths size of part of the recevier in device units if displayed on aGC"

    ^ (aGC font onDevice:aGC device) widthOf:self from:startIndex to:endIndex

    "
     'hello world' widthFrom:1 to:5 on:(View new)
     'hello' widthOn:(View new)
    "
!

widthOn:aGC
    "return ths size of the recevier in device units if displayed on aGC"

    ^ (aGC font onDevice:aGC device) widthOf:self

    "
     'hello world' widthOn:(View new)
    "

    "Created: 12.5.1996 / 20:09:29 / cg"
    "Modified: 17.4.1997 / 12:50:23 / cg"
! !

!CharacterArray methodsFor:'regular expressions'!

allRegexMatches: rxString
    "return a collection of substrings in the receiver, which match the regular expression in rxString"

    ^ rxString asRegex matchesIn: self

    "
     '1234 abcd 3456 defg' allRegexMatches:'[0-9]+'

     '[0-9]+' asRegex matchesIn:'1234 abcd 3456 defg'
    "
!

asRegex
    "Compile the receiver as a regex matcher.
     May raise RxParser>>syntaxErrorSignal or RxParser>>compilationErrorSignal.
     This is a part of the Regular Expression Matcher package,
	(c) 1996, 1999 Vassili Bykov.
     Refer to `documentation' protocol of RxParser class for details."

    ^ Regex::RxParser preferredMatcherClass
	for: (Regex::RxParser new parse: self)
!

asRegexIgnoringCase
    "Compile the receiver as a regex matcher.
     May raise RxParser>>syntaxErrorSignal or RxParser>>compilationErrorSignal.
     This is a part of the Regular Expression Matcher package,
	(c) 1996, 1999 Vassili Bykov.
     Refer to `documentation' protocol of RxParser class for details."

    ^ Regex::RxParser preferredMatcherClass
	    for: (Regex::RxParser new parse: self)
	    ignoreCase: true
!

copyWithRegex: rxString matchesReplacedWith: aString
    ^ rxString asRegex copy: self replacingMatchesWith: aString
!

copyWithRegex: rxString matchesTranslatedUsing: aBlock
    ^ rxString asRegex copy: self translatingMatchesUsing: aBlock
!

matchesRegex: regexString
    "Test if the receiver matches a regex.
     May raise RxParser>>regexErrorSignal or child signals.
     This is a part of the Regular Expression Matcher package,
	(c) 1996, 1999 Vassili Bykov.
     Refer to `documentation' protocol of RxParser class for details."

    ^regexString asRegex matches: self

    "
     'hello world' matchesRegex:'h.*d'.
    "
!

matchesRegexIgnoringCase: regexString
    "Test if the receiver matches a regex.
     May raise RxParser>>regexErrorSignal or child signals.
     This is a part of the Regular Expression Matcher package,
	(c) 1996, 1999 Vassili Bykov.
     Refer to `documentation' protocol of RxParser class for details."

    ^regexString asRegexIgnoringCase matches: self

    "
     'hElLo wOrld' matchesRegexIgnoringCase:'h.*d'.
    "
!

prefixMatchesRegex: regexString
    "Test if the receiver's prefix matches a regex.
     May raise RxParser class>>regexErrorSignal or child signals.
     This is a part of the Regular Expression Matcher package,
	(c) 1996, 1999 Vassili Bykov.
     Refer to `documentation' protocol of RxParser class for details."

    ^regexString asRegex matchesPrefix: self
!

prefixMatchesRegexIgnoringCase: regexString
    "Test if the receiver's prefix matches a regex.
     May raise RxParser class>>regexErrorSignal or child signals.
     This is a part of the Regular Expression Matcher package,
	(c) 1996, 1999 Vassili Bykov.
     Refer to `documentation' protocol of RxParser class for details."

    ^regexString asRegexIgnoringCase matchesPrefix: self
!

regex: rxString matchesCollect: aBlock
    "for all substrings in the receiver which match the regular expression in rxString, evaluate aBlock,
     and collect the returned values."

    ^ rxString asRegex matchesIn: self collect: aBlock

    "
     'hello world' regex:'\w+' matchesCollect:[:each | each asUppercase ].
     '1234 hello 456 world' regex:'\d+' matchesCollect:[:each | Number readFrom:each ].
    "
!

regex:rxString matchesDo: aBlock
    "for all substrings in the receiver which match the regular expression in rxString, evaluate aBlock"

    ^ rxString asRegex matchesIn: self do: aBlock

    "
     'hello world' regex:'\w+' matchesDo:[:each | Transcript showCR:each ].
    "
! !

!CharacterArray methodsFor:'special string converting'!

chopTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters
     in the middle have been removed for a total string length
     of maxLen."

    |sz n1 n2|

    (sz := self size) > maxLen ifTrue:[
	n1 := n2 := maxLen // 2.
	maxLen odd ifTrue:[
	    n2 := n1 + 1
	].
	^ (self copyFrom:1 to:n1) , (self copyFrom:sz - n2 + 1)
    ]

    "
     '12345678901234'   chopTo:15
     '123456789012345'  chopTo:15
     '1234567890123456' chopTo:15
     'aShortString' chopTo:15
     'aVeryLongNameForAStringThatShouldBeShortened' chopTo:15
    "
!

contractAtBeginningTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters
     at the beginning have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz|

    (sz := self size) > maxLen ifTrue:[
	^ '...' , (self copyFrom:(sz - (maxLen - 4)))
    ]

    "
     '12345678901234' contractAtBeginningTo:15
     '123456789012345' contractAtBeginningTo:15
     '1234567890123456' contractAtBeginningTo:15
     'aShortString' contractAtBeginningTo:15
     'aVeryLongNameForAStringThatShouldBeShortened' contractAtBeginningTo:15
    "
!

contractAtEndTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters
     at the end have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz|

    (sz := self size) > maxLen ifTrue:[
	^ self copyReplaceFrom:maxLen - 3
			    with:'...'
    ]

    "
     '12345678901234' contractAtEndTo:15
     '123456789012345' contractAtEndTo:15
     '1234567890123456' contractAtEndTo:15
     'aShortString' contractAtEndTo:15
     'aVeryLongNameForAStringThatShouldBeShortened' contractAtEndTo:15
    "
!

contractTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters
     in the middle have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz "{ SmallInteger }"
     halfSize "{ SmallInteger }"|

    (sz := self size) > maxLen ifTrue:[
	halfSize := maxLen // 2.
	^ self copyReplaceFrom:halfSize - 1
			    to:sz - maxLen + halfSize + 1
			    with:'...'
    ]

    "
     '12345678901234' contractTo:15
     '123456789012345' contractTo:15
     '1234567890123456' contractTo:15
     'aShortString' contractTo:15
     'aVeryLongNameForAStringThatShouldBeShortened' contractTo:15
    "
!

expandPlaceholdersWith:argArrayOrDictionary
    "return a copy of the receiver, where all %i escapes are
     replaced by corresponding arguments' printStrings from the argArrayOrDictionary.
     I.e. 'hello %1; how is %2' expandPlaceholdersWith:#('world' 'this') results
     in the new string 'hello world; how is this'.
     As an extension, the argument may also be a dictionary, providing
     values for symbolic keys.
     In this case, %a .. %z and %(...) are also allowed.
     (%1..%9 require a numeric key in the dictionary, however)
     To get a '%' character, use a '%%'-escape.
     To get an integer-indexed placeHolder followed by another digit,
     or an index > 9, you must use %(digit).
     See also bindWith:... for VisualAge compatibility."

    |expandedString next v key keyAsSymbol
     idx   "{ SmallInteger }"
     idx2  "{ SmallInteger }"
     start "{ SmallInteger }"
     stop  "{ SmallInteger }"|

    expandedString := self species new:0.
    stop := self size.
    start := 1.
    [start <= stop] whileTrue:[
	idx := self indexOf:$% startingAt:start.
	(idx == 0 or:[idx == stop]) ifTrue:[
	    ^ expandedString , (self copyFrom:start to:stop)
	].
	"found a %"
	expandedString := expandedString , (self copyFrom:start to:(idx - 1)).
	next := self at:(idx + 1).
	(next == $%) ifTrue:[
	    expandedString := expandedString , '%'
	] ifFalse:[
	    (next between:$1 and:$9) ifTrue:[
		v := argArrayOrDictionary at:(next digitValue) ifAbsent:nil
	    ] ifFalse:[
		next == $( ifTrue:[
		    idx2 := self indexOf:$) startingAt:idx+2.
		    key := self copyFrom:idx+2 to:idx2-1.
		    idx := idx2 - 1.
		    keyAsSymbol := key asSymbolIfInterned.
		    (keyAsSymbol notNil and:[ argArrayOrDictionary includesKey:keyAsSymbol ]) ifTrue:[
			v := argArrayOrDictionary at:keyAsSymbol
		    ] ifFalse:[
			(key conform:[:each | each isDigit]) ifTrue:[
			    key := Number readFrom:key onError:nil.
			].
			v := argArrayOrDictionary at:key ifAbsent:nil
		    ].
		] ifFalse:[
		    v := argArrayOrDictionary at:next ifAbsent:nil.
		    v isNil ifTrue:[
			(argArrayOrDictionary includesKey:next asString) ifTrue:[
			    v := argArrayOrDictionary at:next asString asSymbol ifAbsent:nil.
			] ifFalse:[
			    v := String with:$% with:next. "/ next asString.
			]
		    ].
		]
	    ].
	    v isNil
		ifTrue:[v := '']
		ifFalse:[
		    v isBlock ifTrue:[
			v := v value
		    ]].
	    expandedString := expandedString , v printString
	].
	start := idx + 2
    ].
    ^  expandedString

    "
     'hello %1' expandPlaceholdersWith:#('world')
     'hello %1; how is %2' expandPlaceholdersWith:#('world' 'this')
     'hello %2; how is %1' expandPlaceholdersWith:#('world' 'this')
     '%1 plus %2 gives %3 ' expandPlaceholdersWith:#(4 5 9)
     '%%(1)0 gives %(1)0' expandPlaceholdersWith:#(123)
     '%%10 gives %10' expandPlaceholdersWith:#(123)
     '%%(10) gives %(10)' expandPlaceholdersWith:#(123)
    "

    "
     |dict|

     dict := Dictionary new.
     dict at:1 put:'one'.
     dict at:$a put:'AAAAA'.
     dict at:$b put:[ Time now ].
     'hello %1 %a %b' expandPlaceholdersWith:dict
    "

    "Modified: 1.7.1997 / 00:53:24 / cg"
!

withCRs
    "return a new string consisting of receivers characters
     with all \-characters replaced by cr-characters."

    ^ self copyReplaceAll:$\ with:(Character cr)

    "
     'hello\world' withCRs
    "

    "Modified: / 18.7.1998 / 22:53:02 / cg"
!

withEscapes
    "return a new string consisting of receivers characters
     with all \X-character escapes replaced by corresponding-characters.
     (similar to the way C-language Strings are converted).
     The following escapes are supported:
	\r      return character
	\n      newline character
	\b      backspace character
	\f      formfeed character
	\t      tab character
	\e      escape character
	\\      the \ character itself
	\nnn    three digit octal number defining the characters ascii value
	\other  other

     Notice, that \' is NOT a valid escape, since the general syntax of
     string constants is not affected by this method.

     Although easily implementable, this is NOT done automatically
     by the compiler (due to a lack of a language standard for this).
     However, the compiler may detect sends ot #withEscapes to string literals
     and place a modified string constant into the binary/byte-code.
     Therefore, no runtime penalty will be payed for using these escapes.
     (not in pre 2.11 versions)
    "

    |sz      "{ SmallInteger }"
     newSize "{ SmallInteger }"
     srcIdx  "{ SmallInteger }"
     dstIdx  "{ SmallInteger }"
     val     "{ SmallInteger }"
     newString next hasEmphasis e|

    "
     first, count the number of escapes, to allow preallocation
     of the new string ...
     (it is faster to scan the string twice than to reallocate it multiple
      times in a WriteStream)
    "
    sz := newSize := self size.
    srcIdx := 1.
    [(srcIdx := self indexOf:$\ startingAt:srcIdx) ~~ 0] whileTrue:[
	srcIdx == sz ifFalse:[
	    newSize := newSize - 1.
	    srcIdx := srcIdx + 1.
	    next := self at:srcIdx.
	    next == $0 ifTrue:[
		[srcIdx < sz and:[next isDigit]] whileTrue:[
		    newSize := newSize - 1. srcIdx := srcIdx + 1. next := self at:srcIdx.
		]
	    ].
	].
	srcIdx := srcIdx + 1.
    ].

    newSize == sz ifTrue:[
	^ self
    ].

    newString := self species new:newSize.

    hasEmphasis := self hasChangeOfEmphasis.

    "
     copy over, replace escapes
    "
    srcIdx := dstIdx := 1.
    [srcIdx <= sz] whileTrue:[
	next := self at:srcIdx.
	hasEmphasis ifTrue:[
	    e := self emphasisAt:srcIdx
	].
	srcIdx := srcIdx + 1.
	next == $\ ifTrue:[
	    srcIdx <= sz ifTrue:[
		next := self at:srcIdx.
		srcIdx := srcIdx + 1.
		next == $r ifTrue:[
		    next := Character return
		] ifFalse:[
		    next == $n ifTrue:[
			next := Character nl
		    ] ifFalse:[
			next == $b ifTrue:[
			    next := Character backspace
			] ifFalse:[
			    next == $f ifTrue:[
				next := Character newPage
			    ] ifFalse:[
				next == $t ifTrue:[
				    next := Character tab
				] ifFalse:[
				    next == $e ifTrue:[
					next := Character esc
				    ] ifFalse:[
					next == $0 ifTrue:[
					    val := 0.
					    [next notNil and:[next isDigit]] whileTrue:[
						val := val * 8 + next digitValue.
						srcIdx <= sz ifTrue:[
						    next := self at:srcIdx.
						    srcIdx := srcIdx + 1.
						] ifFalse:[
						    next := nil
						]
					    ].
					    next := Character value:val.
					]
				    ]
				]
			    ]
			]
		    ]
		].
	    ].
	].
	newString at:dstIdx put:next.
	hasEmphasis ifTrue:[
	    newString emphasisAt:dstIdx put:e
	].
	dstIdx := dstIdx + 1.
    ].
    ^ newString

    "
     'hello world' withEscapes
     'hello\world' withEscapes
     'hello\world\' withEscapes
     'hello world\' withEscapes
     'hello\tworld' withEscapes
     'hello\nworld\na\n\tnice\n\t\tstring' withEscapes
     'hello\tworld\n' withEscapes
     'hello\010world' withEscapes
     'hello\r\nworld' withEscapes
    "

    "Modified: 12.5.1996 / 12:53:34 / cg"
!

withMatchEscapes
    "return a copy of the receiver with all match characters escaped
     by $\ characters (to be usable as a match string).
     Return the receiver, if there are none."

    |in out c escape|

    escape := self class matchEscapeCharacter.

    in := self readStream.
    out := WriteStream on:(self species new:self size).
    [in atEnd] whileFalse:[
	c := in next.
	(c == escape or:['*[#' includes:c]) ifTrue:[
	    out nextPut:$\.
	].
	out nextPut:c.
    ].
    ^ out contents.

    "
     '*foo' withMatchEscapes
     '\*foo' withMatchEscapes
     '*foo' withMatchEscapes
     '\\*foo' withMatchEscapes
     'foo*' withMatchEscapes
     'foo\*' withMatchEscapes
     'foo\' withMatchEscapes
     'f*o*o' withMatchEscapes
    "

    "Modified: 2.4.1997 / 18:13:04 / cg"
!

withTabs
    "return a string consisting of the receivers characters
     where leading spaces are replaced by tabulator characters (assuming 8-col tabs).
     Notice: if the receiver does not contain any tabs, it is returned unchanged;
     otherwise a new string is returned.
     Limitation: only the very first spaces are replaced
		 (i.e. if the receiver contains newLine characters,
		  no tabs are inserted after those lineBreaks)"

    |idx   "{ SmallInteger }"
     nTabs "{ SmallInteger }"
     newString|

    idx := self findFirst:[:c | (c ~~ Character space)].
    nTabs := (idx-1) // 8.
    nTabs <= 0 ifTrue:[^ self].

    "any tabs"
    newString := self class new:(self size - (nTabs * 7)).
    newString atAll:(1 to:nTabs) put:(Character tab).
    newString replaceFrom:(nTabs + 1) with:self startingAt:(nTabs * 8 + 1).
    ^ newString

    "
     '12345678901234567890' withTabs
     '       8901234567890' withTabs
     '        901234567890' withTabs
     '               67890' withTabs
     '                7890' withTabs
     '                 890' withTabs
    "
!

withTabsExpanded
    "return a string consisting of the receivers characters,
     where all tabulator characters are expanded into spaces (assuming 8-col tabs).
     Notice: if the receiver does not contain any tabs, it is returned unchanged;
     otherwise a new string is returned.
     This does handle multiline strings."

    ^ self withTabsExpanded:8

    "
     ('1' , Character tab asString , 'x') withTabsExpanded
     ('12345' , Character tab asString , 'x') withTabsExpanded
     ('123456' , Character tab asString , 'x') withTabsExpanded
     ('1234567' , Character tab asString , 'x') withTabsExpanded
     ('12345678' , Character tab asString , 'x') withTabsExpanded
     ('123456789' , Character tab asString , 'x') withTabsExpanded

     (String with:Character tab
	     with:Character tab
	     with:$1) withTabsExpanded

     (String with:Character tab
	     with:$1
	     with:Character tab
	     with:$2) withTabsExpanded

     (String with:Character tab
	     with:$1
	     with:Character cr
	     with:Character tab
	     with:$2) withTabsExpanded
    "

    "Modified: 12.5.1996 / 13:05:10 / cg"
!

withTabsExpanded:numSpaces
    "return a string consisting of the receivers characters,
     where all tabulator characters are expanded into spaces (assuming numSpaces-col tabs).
     Notice: if the receiver does not contain any tabs, it is returned unchanged;
     otherwise a new string is returned.
     This does handle multiline strings."

    |col    "{ SmallInteger }"
     str ch
     dstIdx "{ SmallInteger }"
     newSz  "{ SmallInteger }"
     sz "{ SmallInteger }"
     hasEmphasis e|

    (self includes:(Character tab)) ifFalse:[^ self].

    sz := self size.

    "/ count the new size first, instead of
    "/ multiple resizing (better for large strings)

    col := 1. newSz := 0.
    1 to:sz do:[:srcIdx |
	ch := self at:srcIdx.
	ch == Character tab ifFalse:[
	    col := col + 1.
	    newSz := newSz + 1.
	    ch == Character cr ifTrue:[
		col := 1
	    ].
	] ifTrue:[
	    (col \\ numSpaces) to:numSpaces do:[:ii |
		newSz := newSz + 1.
		col := col + 1
	    ].
	]
    ].

    str := self species new:newSz.

    hasEmphasis := self hasChangeOfEmphasis.

    col := 1. dstIdx := 1.
    1 to:sz do:[:srcIdx |
	ch := self at:srcIdx.

	ch == Character tab ifFalse:[
	    col := col + 1.
	    ch == Character cr ifTrue:[
		col := 1
	    ].
	    hasEmphasis ifTrue:[
		e := self emphasisAt:srcIdx.
		str emphasisAt:dstIdx put:e
	    ].
	    str at:dstIdx put:ch.
	    dstIdx := dstIdx + 1
	] ifTrue:[
	    (col \\ numSpaces) to:numSpaces do:[:ii |
		str at:dstIdx put:Character space.
		dstIdx := dstIdx + 1.
		col := col + 1
	    ].
	]
    ].
    ^ str

    "
     ('1' , Character tab asString , 'x') withTabsExpanded
     ('1' , Character tab asString , 'x') withTabsExpanded:4
     ('12345' , Character tab asString , 'x') withTabsExpanded
     ('123456' , Character tab asString , 'x') withTabsExpanded
     ('1234567' , Character tab asString , 'x') withTabsExpanded
     ('12345678' , Character tab asString , 'x') withTabsExpanded
     ('123456789' , Character tab asString , 'x') withTabsExpanded

     (String with:Character tab
	     with:Character tab
	     with:$1) withTabsExpanded

     (String with:Character tab
	     with:$1
	     with:Character tab
	     with:$2) withTabsExpanded

     (String with:Character tab
	     with:$1
	     with:Character cr
	     with:Character tab
	     with:$2) withTabsExpanded
    "

    "Modified: 12.5.1996 / 13:05:10 / cg"
!

withoutCRs
    "return a new collection consisting of receivers elements
     with all cr-characters replaced by \-characters.
     This is the reverse operation of withCRs."

    ^ self copyReplaceAll:(Character cr) with:$\
    "
     'hello
world' withoutCRs
    "

    "Modified: / 18.7.1998 / 22:53:08 / cg"
!

withoutLeadingSeparators
    "return a copy of myself without leading separators.
     Notice: this does remove tabs, newline or any other whitespace.
     Returns an empty string, if the receiver consist only of whitespace."

    |index|

    index := self indexOfNonSeparatorStartingAt:1.
    index ~~ 0 ifTrue:[
	index == 1 ifTrue:[
	    ^ self
	].
	^ self copyFrom:index
    ].
    ^ ''

    "
     '    foo    ' withoutLeadingSeparators
     'foo    '     withoutLeadingSeparators
     '    foo'     withoutLeadingSeparators
     '       '     withoutLeadingSeparators
     'foo'         withoutLeadingSeparators
     ('  ' , Character tab asString , ' foo   ') withoutLeadingSeparators inspect
    "
!

withoutMatchEscapes
    "return a copy of the receiver with all $\ removed or
     the receiver, if there are none."

    |in out c escape|

    escape := self class matchEscapeCharacter.

    in := self readStream.
    out := self species writeStream.
    [in atEnd] whileFalse:[
	c := in next.
	c == escape ifTrue:[
	    in atEnd ifFalse:[
		c := in next.
	    ]
	].
	out nextPut:c.
    ].
    ^ out contents.

    "
     '*foo' withoutMatchEscapes
     '\*foo' withoutMatchEscapes
     '*foo' withoutMatchEscapes
     '\\*foo' withoutMatchEscapes
     'foo*' withoutMatchEscapes
     'foo\*' withoutMatchEscapes
     'foo\' withoutMatchEscapes
     'f\*o\*o' withoutMatchEscapes
    "

    "Modified: 30.6.1997 / 13:40:23 / cg"
!

withoutPrefix:aString
    "if the receiver startsWith aPrefix, return a copy without it.
     Otherwise return the receiver"

    (self startsWith:aString) ifTrue:[
	^ self copyFrom:aString size + 1
    ].
    ^ self

    "
     'helloworld' withoutPrefix:'hello'
     'helloworld' withoutPrefix:'foo'
    "
!

withoutSeparators
    "return a copy of myself without leading and trailing whitespace.
     (but whiteSpace in-between is preserved)
     Whitespace is space, tab, newline, formfeed.
     Use withoutSpaces, if you want to remove spaces only."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }"
     sz|

    sz := self size.
    startIndex := 1.
    endIndex := sz.

    [(startIndex < endIndex) and:[(self at:startIndex) isSeparator]] whileTrue:[
	startIndex := startIndex + 1
    ].
    [(endIndex > 1) and:[(self at:endIndex) isSeparator]] whileTrue:[
	endIndex := endIndex - 1
    ].
    startIndex > endIndex ifTrue:[
	^ ''
    ].
    ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
	^ self
    ].
    ^ self copyFrom:startIndex to:endIndex

    "
     '    foo    ' withoutSeparators
     '    foo' withoutSeparators
     'foo    ' withoutSeparators
     '       ' withoutSeparators
     ('  foo' , Character tab asString , '    ') withoutSeparators inspect
    "
!

withoutSpaces
    "return a copy of myself without leading and trailing spaces.
     (but spaces in-between are preserved)
     Notice: this does NOT remove tabs, newline or any other whitespace.
     Use withoutSeparators for this."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }"
     sz|

    sz := self size.
    startIndex := 1.
    endIndex := sz.

    [(startIndex < endIndex) and:[(self at:startIndex) == Character space]] whileTrue:[
	startIndex := startIndex + 1
    ].
    [(endIndex > 1) and:[(self at:endIndex) == Character space]] whileTrue:[
	endIndex := endIndex - 1
    ].
    startIndex > endIndex ifTrue:[
	^ ''
    ].
    ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
	^ self
    ].
    ^ self copyFrom:startIndex to:endIndex

    "
     '    foo    ' withoutSpaces
     'foo    '     withoutSpaces
     '    foo'     withoutSpaces
     '       '     withoutSpaces
     'a     b'     withoutSpaces
     ('  foo' , Character tab asString , '    ') withoutSpaces inspect
    "
!

withoutTrailingSeparators
    "return a copy of myself without trailing separators.
     Notice: this does remove tabs, newline or any other whitespace.
     Returns an empty string, if the receiver consist only of whitespace."

    |index|

    index := self size.
    [index ~~ 0] whileTrue:[
	(self at:index) isSeparator ifFalse:[
	    ^ self copyTo:index
	].
	index := index - 1
    ].
    ^ ''

    "
     '    foo    ' withoutTrailingSeparators
     'foo    '     withoutTrailingSeparators
     '    foo'     withoutTrailingSeparators
     '       '     withoutTrailingSeparators
     'foo'         withoutTrailingSeparators
     ('  ' , Character tab asString , ' foo   ') withoutTrailingSeparators inspect
     ('   foo' , Character tab asString) withoutTrailingSeparators inspect
    "
! !

!CharacterArray methodsFor:'substring searching'!

findString:subString
    "find a substring. if found, return the index;
     if not found, return 0."

    ^ self indexOfSubCollection:subString startingAt:1 ifAbsent:0

    "
     'hello world' findString:'llo'
     'hello world' findString:'ole'
    "
!

findString:subString ifAbsent:exceptionBlock
    "find a substring. If found, return the index;
     if not found, return the result of evaluating exceptionBlock."

    ^ self indexOfSubCollection:subString startingAt:1 ifAbsent:exceptionBlock
!

findString:subString startingAt:index
    "find a substring, starting at index. if found, return the index;
     if not found, return 0."

    ^ self indexOfSubCollection:subString startingAt:index ifAbsent:0

    "
     'hello yello' findString:'llo' startingAt:1
     'hello yello' findString:'llo' startingAt:5
     'hello yello' findString:'llo' startingAt:15
    "
!

findString:subString startingAt:index ifAbsent:exceptionBlock
    "find a substring, starting at index. if found, return the index;
     if not found, return the result of evaluating exceptionBlock."

    ^ self indexOfSubCollection:subString startingAt:index ifAbsent:exceptionBlock
!

includesString:aString
    "return true, if a substring is contained in the receiver"

    ^ (self indexOfSubCollection:aString startingAt:1 ifAbsent:0) ~~ 0

    "
     'hello world' includesString:'hel'
     'hello world' includesString:'rld'
     'hello world' includesString:'llo'
     'hello world' includesString:'LLO'
    "
!

indexOfSubCollection:subString startingAt:index ifAbsent:exceptionBlock
    "find a substring, starting at index. if found, return the index;
     if not found, return the result of evaluating exceptionBlock.
     This is a q&d hack - not very efficient"

    |firstChar found
     startIndex "{ Class: SmallInteger }"
     subSize    "{ Class: SmallInteger }"
     mySize     "{ Class: SmallInteger }"
     runIdx     "{ Class: SmallInteger }" |

    subSize := subString size.
    subSize == 0 ifTrue:[   "empty string matches"
	subString isString ifFalse:[
	   self error:'non string argument' mayProceed:true.
	].
	^ index
    ].
    mySize := self size.
    firstChar := subString at:1.
    startIndex := self indexOf:firstChar startingAt:index.
    [startIndex == 0] whileFalse:[
	runIdx := startIndex.
	found := true.
	1 to:subSize do:[:i |
	    runIdx > mySize ifTrue:[
		found := false
	    ] ifFalse:[
		(subString at:i) ~= (self at:runIdx) ifTrue:[
		    found := false
		]
	    ].
	    runIdx := runIdx + 1
	].
	found ifTrue:[
	    ^ startIndex
	].
	startIndex := self indexOf:firstChar startingAt:(startIndex + 1)
    ].
    ^ exceptionBlock value

    "Modified: 23.2.1996 / 15:35:15 / cg"
!

restAfter:keyword withoutSeparators:strip
    "compare the left of the receiver with keyword,
     if it matches return the right.
     Finally, if strip is true, remove whiteSpace.
     This method is used to match and extract lines of the form:
	something: rest
     where we are interested in rest, but only if the receiver string
     begins with something.

     You may wonder why such a specialized method exists here
     - this is so common when processing mailboxes,
     rcs files, nntp/pop3 responses, that is was considered worth
     a special method here to avoid having the code below a hundred
     times in variuos places."

    |rest|

    (self startsWith:keyword) ifTrue:[
	rest := self copyFrom:(keyword size + 1).
	strip ifTrue:[
	    rest := rest withoutSeparators
	].
	^ rest
    ].
    ^ nil

    "
     'foo: hello world' restAfter:'foo:' withoutSeparators:true
     'funny: something' restAfter:'foo:' withoutSeparators:true

     'foo:     hello world    ' restAfter:'foo:' withoutSeparators:true
     'foo:     hello world    ' restAfter:'foo:' withoutSeparators:false
    "

    "Created: 25.11.1995 / 11:04:18 / cg"
! !

!CharacterArray methodsFor:'testing'!

continuesWith:aString startingAt:startIndex
    "return true, if the receiver beginning at startIndex
     contains the characters in aString."

    |sz  "{Class: SmallInteger }"
     idx "{Class: SmallInteger }"|

    sz := aString size.
    idx := startIndex.

    1 to:sz do:[:i |
	(self at:idx) ~~ (aString at:i) ifTrue:[^ false].
	idx := idx + 1
    ].
    ^ true

    "
     'hello world' continuesWith:'world' startingAt:6
     'hello world' continuesWith:'world' startingAt:7
    "

    "Created: 12.5.1996 / 15:46:40 / cg"
    "Modified: 26.7.1996 / 19:08:36 / cg"
!

countWords
    "return the number of words, which are separated by separators"

    |tally "{ Class: SmallInteger }"
     start "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"
     stop ch|

    tally := 0.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	ch := self at:start.
	ch isSeparator ifTrue:[
	    start := start + 1
	] ifFalse:[
	    stop := self indexOfSeparatorStartingAt:start.
	    (stop == 0) ifTrue:[
		stop := mySize + 1
	    ].
	    tally := tally + 1.
	    start := stop
	]
    ].
    ^ tally

    "
     'hello world isnt this nice' countWords'
    "
!

endsWith:aStringOrCharacter
    "return true, if the receiver ends with something, aStringOrCharacter."

    |s|

    (s := self string) ~~ self ifTrue:[
	^ s endsWith:aStringOrCharacter
    ].
    aStringOrCharacter isCharacter ifTrue:[
	^ self last = aStringOrCharacter
    ].
    ^ super endsWith:aStringOrCharacter

    "
     'hello world' endsWith:'world'
     'hello world' asText allBold endsWith:'world'
    "

    "Modified: 12.5.1996 / 15:49:18 / cg"
!

isAlphaNumeric
    "return true, if the receiver is some alphanumeric word;
     i.e. consists of a letter followed by letters or digits."

    self size == 0 ifTrue:[
	"mhmh what is this ?"
	^ false
    ].
    (self at:1) isLetter ifFalse:[^ false].
    self do:[:char |
	char isLetterOrDigit ifFalse:[^ false].
    ].
    ^ true

    "
     'helloWorld' isAlphaNumeric
     'foo1234' isAlphaNumeric
     'f1234' isAlphaNumeric
     '1234' isAlphaNumeric
     '+' isAlphaNumeric
    "
!

isBinarySelector
    "treating the receiver as a message selector, return true if its a binary selector"

    |binopChars|

    (self size > 3) ifTrue:[^ false].

    binopChars := Scanner binarySelectorCharacters.
    ^ (self conform:[:char | (binopChars includes:char)])

    "
     'foo:bar:' isBinarySelector
     #foo:bar: isBinarySelector
     'hello' isBinarySelector
     '+' isBinarySelector
     '|' isBinarySelector
     '?' isBinarySelector
     ':' isBinarySelector
     'a:' isBinarySelector
     '->' isBinarySelector
     '<->' isBinarySelector
     '::' isBinarySelector
    "

    "Modified: 4.1.1997 / 14:16:14 / cg"
!

isBlank
    "return true, if the receiver contains spaces only"

    self do:[:char |
	char ~~ Character space ifTrue:[^ false].
    ].
    ^ true
!

isNumeric
    "return true, if the receiver is some numeric word;
     i.e. consists only of digits."

    self size == 0 ifTrue:[
	^ false
    ].
    self do:[:char |
	char isDigit ifFalse:[^ false].
    ].
    ^ true

    "
     'helloWorld' isNumeric
     'foo1234' isNumeric
     'f1234' isNumeric
     '1234' isNumeric
     '+' isNumeric
    "
!

levenshteinTo:aString
    "return the levenshtein distance to the argument, aString;
     this value corresponds to the number of replacements that have to be
     made to get aString from the receiver.
     See IEEE transactions on Computers 1976 Pg 172 ff."

    "
     in the following, we assume that ommiting a character
     is less of an error than inserting an extra character.
     Therefore the different insertion (i) and deletion (d) values.
     s: substitution weight
     k: keyboard weight (typing a nearby key)
     c: case weight
     i: insertion of extra character weight
     d: delete of a character weight
    "

    ^ self levenshteinTo:aString s:4 k:2 c:1 i:2 d:6

    "
     'computer' levenshteinTo:'computer'
     'cOmputer' levenshteinTo:'computer'
     'cOmpuTer' levenshteinTo:'computer'
     'cimputer' levenshteinTo:'computer'
     'cumputer' levenshteinTo:'computer'

     'cmputer' levenshteinTo:'computer'
     'coomputer' levenshteinTo:'computer'

     'ocmprt' levenshteinTo:'computer'
     'computer' levenshteinTo:'computer'
     'ocmputer' levenshteinTo:'computer'
     'cmputer' levenshteinTo:'computer'
     'computer' levenshteinTo:'cmputer'
     'Computer' levenshteinTo:'computer'
    "
!

levenshteinTo:aString s:substWeight k:kbdTypoWeight c:caseWeight i:insrtWeight d:deleteWeight
    "parametrized levenshtein.
     return the levenshtein distance to the argument, aString;
     this value corrensponds to the number of replacements that have to be
     made to get aString from the receiver.
     The arguments are the costs for
	s:substitution,
	k:keyboard type (substitution),
	c:case-change,
	i:insertion
	d:deletion
     of a character.
     See IEEE transactions on Computers 1976 Pg 172 ff"

    |d  "delta matrix"
     len1 "{ Class: SmallInteger }"
     len2 "{ Class: SmallInteger }"
     dim  "{ Class: SmallInteger }"
     prevRow row col
     dimPlus1 "{ Class: SmallInteger }"
     min pp c1 c2|

    len1 := self size.
    len2 := aString size.

    "create the help-matrix"

    dim := len1 max:len2.
    dimPlus1 := dim + 1.

    d := Array new:dimPlus1.
    1 to:dimPlus1 do:[:i |
	d at:i put:(Array new:dimPlus1)
    ].

    "init help-matrix"

    (d at:1) at:1 put:0.
    row := d at:1.
    1 to:dim do:[:j |
	row at:(j + 1) put:( (row at:j) + insrtWeight )
    ].

    1 to:dim do:[:i |
	 (d at:(i + 1)) at:1 put:(  ((d at:i) at:1) + deleteWeight )
    ].

    1 to:len1 do:[:i |
	c1 := self at:i.
	1 to:len2 do:[:j |
	    c2 := aString at:j.
	    (c1 == c2) ifTrue:[
		pp := 0
	    ] ifFalse:[
		(c1 asLowercase == c2 asLowercase) ifTrue:[
		    pp := caseWeight
		] ifFalse:[
		    pp := substWeight.
		    substWeight ~~ kbdTypoWeight ifTrue:[
			(DoWhatIMeanSupport isKey:c1 asLowercase nextTo:c2 asLowercase) ifTrue:[
			    pp := kbdTypoWeight.
			].
		    ].
		]
	    ].
	    prevRow := d at:i.
	    row := d at:(i + 1).
	    col := j + 1.
	    min := (prevRow at:j) + pp.
	    min := min min:( (row at:j) + insrtWeight).
	    min := min min:( (prevRow at:col) + deleteWeight).
	    row at:col put: min
	]
    ].

    ^ (d at:(len1 + 1)) at:(len2 + 1)
!

numArgs
    "treating the receiver as a message selector, return how many arguments would it take"

    |binopChars firstChar|

    (self size > 2) ifFalse:[
	binopChars := Scanner binarySelectorCharacters.
	firstChar := self at:1.

	(self size == 1) ifTrue:[
	    (binopChars includes:firstChar) ifFalse:[^ 0].
	    ^ 1
	].
	(binopChars includes:firstChar) ifTrue:[
	    (binopChars includes:(self at:2)) ifTrue:[^ 1]
	]
    ].
    ^ self occurrencesOf:$:

    "
     'foo:bar:' numArgs
     #foo:bar: numArgs
     'hello' numArgs
     '+' numArgs
     '|' numArgs
     '?' numArgs
    "

    "Modified: 4.1.1997 / 14:16:14 / cg"
!

partsIfSelector
    "treat the receiver as a message selector, return a collection of parts."

    |idx1 "{ Class: SmallInteger }"
     coll idx2 sz|

    coll := OrderedCollection new.
    idx1 := 1.
    sz := self size.
    [true] whileTrue:[
	idx2 := self indexOf:$: startingAt:idx1 + 1.
	(idx2 == 0 or:[idx2 == sz]) ifTrue:[
	    coll add:(self copyFrom:idx1).
	    ^ coll
	].
	coll add:(self copyFrom:idx1 to:idx2).
	idx1 := idx2 + 1
    ].

    "
     'foo:bar:' partsIfSelector
     #foo:bar: partsIfSelector
     'hello' partsIfSelector
     '+' partsIfSelector
    "
!

spellAgainst: aString
    "return an integer between 0 and 100 indicating how similar
     the argument is to the receiver.  No case conversion is done.
     This algorithm is much simpler (but also less exact) than the
     levenshtein distance. Experiment which is better for your
     application."

    | i1     "{ Class: SmallInteger }"
      i2     "{ Class: SmallInteger }"
      next1  "{ Class: SmallInteger }"
      next2  "{ Class: SmallInteger }"
      size1  "{ Class: SmallInteger }"
      size2  "{ Class: SmallInteger }"
      score  "{ Class: SmallInteger }"
      maxLen "{ Class: SmallInteger }" |

    size1 := self size.
    size2 := aString size.
    maxLen := size1 max:size2.
    score := 0.
    i1 := i2 := 1.
    [i1 <= size1 and: [i2 <= size2]] whileTrue:[
	next1 := i1 + 1.
	next2 := i2 + 1.
	(self at:i1) == (aString at:i2) ifTrue: [
	    score := score+1.
	    i1 := next1.
	    i2 := next2
	] ifFalse: [
	    (i2 < size2 and: [(self at:i1) == (aString at:next2)]) ifTrue: [
		i2 := next2
	    ] ifFalse: [
		(i1 < size1 and: [(self at:next1) == (aString at:i2)]) ifTrue: [
		    i1 := next1
		] ifFalse: [
		    i1 := next1.
		    i2 := next2
		]
	    ]
	]
    ].

    score == maxLen ifTrue: [^ 100].
    ^ 100 * score // maxLen

    "
     'Smalltalk' spellAgainst: 'Smalltlak'
     'Smalltalk' spellAgainst: 'smalltlak'
     'Smalltalk' spellAgainst: 'smalltalk'
     'Smalltalk' spellAgainst: 'smalltlk'
     'Smalltalk' spellAgainst: 'Smalltolk'
    "
!

startsWith:aString
    "return true, if the receiver starts with something, aString.
     If the argument is empty, true is returned."

    |s|

    (s := self string) ~~ self ifTrue:[
	^ s startsWith:aString
    ].
    ^ super startsWith:aString

    "
     'hello world' startsWith:'hello'
     'hello world' asText allBold startsWith:'hello'
     'hello world' asText allBold startsWith:''
    "

    "Created: 12.5.1996 / 15:46:40 / cg"
    "Modified: 12.5.1996 / 15:49:24 / cg"
! !

!CharacterArray methodsFor:'tracing'!

traceInto:aRequestor level:level from:referrer
    "double dispatch into tracer, passing my type implicitely in the selector"

    ^ aRequestor traceCharacterArray:self level:level from:referrer


! !

!CharacterArray methodsFor:'visiting'!

acceptVisitor:aVisitor with:aParameter

    ^ aVisitor visitString:self with:aParameter
! !

!CharacterArray class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.320 2005-07-08 17:15:00 cg Exp $'
! !

CharacterArray initialize!