CharArray.st
author Stefan Vogel <sv@exept.de>
Thu, 14 Dec 1995 23:42:02 +0100
changeset 757 93d5f6b86e98
parent 735 362ce9e28d89
child 759 908363ce8a32
permissions -rw-r--r--
Add SemaphoreSet.

"
 COPYRIGHT (c) 1994 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

ByteArray subclass:#CharacterArray
	instanceVariableNames:''
	classVariableNames:'PreviousMatch'
	poolDictionaries:''
	category:'Collections-Text'
!

!CharacterArray class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1994 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    CharacterArray is a superclass for all kinds of Strings (i.e.
    (singleByte-)Strings, TwoByteStrings and whatever comes in the future.

    As the name already implies, this class is abstract, meaning that there are
    no instances of it. All this class does is provide common protocol for 
    concrete subclasses.
"
! !

!CharacterArray class methodsFor:'instance creation'!

basicNew
    "return a new empty string"

    ^ self basicNew:0
!

fromString:aString
    "return a copy of the argument, aString"

    ^ (self basicNew:(aString size)) replaceFrom:1 with:aString

    "TwoByteString fromString:'hello'"
!

fromStringCollection:aCollectionOfStrings
    "return new string formed by concatenating a copy of the argument, aString"

    ^ self fromStringCollection:aCollectionOfStrings separatedBy:''

    "
     String fromStringCollection:#('hello' 'world' 'how' 'about' 'this')
    "

    "Created: 20.11.1995 / 15:26:59 / cg"
!

fromStringCollection:aCollectionOfStrings separatedBy:aSeparatorString
    "return new string formed by concatenating a copy of the argument, aString"

    |newString first|

    newString := ''.
    first := true.
    aCollectionOfStrings do:[:s | 
	first ifFalse:[
	    newString := newString , aSeparatorString
	] ifTrue:[
	    first := false
	].
	newString := newString , s
    ].
    ^ newString

    "
     String fromStringCollection:#('hello' 'world' 'how' 'about' 'this') separatedBy:' '
    "

    "Created: 20.11.1995 / 15:32:17 / cg"
!

new
    "return a new empty string"

    ^ self basicNew:0
! !

!CharacterArray class methodsFor:'code tables'!

decoderTableFor:encoding
    "this is an experimental interface - unfinished"

    |table|

    table := (0 to:255) asByteArray.
    encoding == #mac ifTrue:[
	table at:8r232+1 put:246. "/ german umlaut o (mac: 154; 8859: 246)
	table at:8r212+1 put:228. "/ german umlaut a (mac: 138; 8859: 228)
	table at:8r237+1 put:252. "/ german umlaut u (mac: 159; 8859: 252)
	table at:8r206+1 put:220. "/ german umlaut U (mac: 134; 8859: 220)
	"/ more needed here - need info to do it ....
    ].
    encoding == #postscript ifTrue:[
	table at:8r224+1 put:246. "/ german umlaut o (ps: 148; 8859: 246)
	table at:8r204+1 put:228. "/ german umlaut a (ps: 132; 8859: 228)
	table at:8r201+1 put:252. "/ german umlaut u (ps: 129; 8859: 252)
	table at:8r231+1 put:214. "/ german umlaut O (ps: 153; 8859: 214)
	table at:8r216+1 put:196. "/ german umlaut A (ps: 142; 8859: 196)
	table at:8r232+1 put:220. "/ german umlaut U (ps: 154; 8859: 220)
	"/ more needed here - need info to do it ....
    ].
    "/ more encodings needed here ....

    "Created: 20.10.1995 / 23:04:43 / cg"
! !

!CharacterArray class methodsFor:'pattern matching'!

matchScan:matchScanArray from:matchStart to:matchStop with:aString from:start to:stop ignoreCase:ignoreCase
    "helper for match; return true if the characters from start to stop in
     aString are matching the scan in matchScan from matchStart to matchStop.
     The matchScan is as created by asMatchScanArray.

     This algorithm is not at all the most efficient; 
     for heavy duty pattern matching, an interface (primitive) to the regex 
     pattern matching package should be added."

    |matchEntry 
     mStart "{ Class: SmallInteger }"
     mStop  "{ Class: SmallInteger }"
     sStart "{ Class: SmallInteger }"
     sStop  "{ Class: SmallInteger }"
     mSize  "{ Class: SmallInteger }"
     sSize  "{ Class: SmallInteger }"
     index  "{ Class: SmallInteger }"
     quickCheck matchLast
     matchSet checkChar included|

    mStart := matchStart.
    mStop := matchStop.
    sStart := start.
    sStop := stop.

    [true] whileTrue:[
	mSize := mStop - mStart + 1.
	sSize := sStop - sStart + 1.

	"empty strings match"
	(mSize == 0) ifTrue:[^ (sSize == 0)].

	matchEntry := matchScanArray at:mStart.

	"/ the most common case first:
	(sSize ~~ 0 
	and:[(checkChar := (aString at:sStart)) == matchEntry]) ifTrue:[
	    "advance by one and continue"
	    mStart := mStart + 1.
	    sStart := sStart + 1
	] ifFalse:[
	    (matchEntry == #any) ifTrue:[
		"restString empty -> no match"
		(sSize == 0) ifTrue:[^ false].
		"# matches single character"
		((sSize == 1) and:[mSize == 1]) ifTrue:[^ true].
		"advance by one and continue"
		mStart := mStart + 1.
		sStart := sStart + 1
	    ] ifFalse:[
		(matchEntry == #anyString) ifTrue:[
		    "* alone matches anything"
		    (mSize == 1) ifTrue:[^ true].
		    "restString empty & matchString not empty -> no match"
		    (sSize == 0) ifTrue:[^ false].

		    "
		     try to avoid some of the recursion by checking last
		     character and continue with shortened strings if possible
		    "
		    quickCheck := false.
		    (mStop >= mStart) ifTrue:[
			matchLast := matchScanArray at:mStop.
			(matchLast ~~ #anyString) ifTrue:[
			    (matchLast == #any) ifTrue:[
				quickCheck := true
			    ] ifFalse:[
				matchLast == (aString at:sStop) ifTrue:[
				    quickCheck := true
				] ifFalse:[
				    matchLast isString ifTrue:[
					quickCheck := matchLast includes:(aString at:sStop)
				    ]
				]
			    ]
			]
		    ].
		    quickCheck ifFalse:[
			"
			 no quick check possible;
			 loop over all possible substrings
			"
			index := sStart.
			[index <= sStop] whileTrue:[
			    (self matchScan:matchScanArray 
				  from:(mStart + 1) 
				  to:mStop 
				  with:aString 
				  from:index 
				  to:stop 
				  ignoreCase:ignoreCase) ifTrue:[
				^ true
			    ].
			    index := index + 1
			].
			^ false
		    ].
		    "
		     quickCheck ok, advance from the right
		    "
		    mStop := mStop - 1.
		    sStop := sStop - 1
		] ifFalse:[
		    (matchEntry isString) ifTrue:[
			"testString empty -> no match"
			(sSize == 0) ifTrue:[^ false].

			included := false.
			"/ checkChar := aString at:sStart.
			included := matchEntry includes:checkChar.
			included ifFalse:[
			    ignoreCase ifTrue:[
				checkChar isUppercase ifTrue:[
				    included := matchEntry includes:checkChar asLowercase.
				] ifFalse:[
				    included := matchEntry includes:checkChar asUppercase.
				]
			    ].
			].
			mStart := mStart + 1.
			mSize := mSize - 1.
			included ifFalse:[^ false].

			((sSize == 1) and:[mSize == 0]) ifTrue:[^ true].
			"cut off 1st char and continue"
			sStart := sStart + 1
		    ] ifFalse:[
			"/ must be single character

			"testString empty ?"
			(sSize == 0) ifTrue:[^ false].

			"first characters equal ?"
			"/ checkChar := aString at:sStart.
			ignoreCase ifFalse:[^ false].
			(checkChar asUppercase ~~ matchEntry asUppercase) ifTrue:[^ false].

			"advance and continue"
			mStart := mStart + 1.
			sStart := sStart + 1
		    ]
		]
	    ]
	]
    ]
!

matchScanArrayFrom:aString
    "scan a pattern string and decompose it into a scanArray.
     This is processed faster (especially with character ranges), and
     can also be reused later. (if the same pattern is to be searched again)"

    |coll 
     idx "{ Class: SmallInteger }"
     end c1 c2 matchSet previous|

    coll := OrderedCollection new.
    idx := 1. end := aString size.
    [idx <= end] whileTrue:[
	|char this|

	char := aString at:idx.
	char == $* ifTrue:[
	    previous ~~ #anyString ifTrue:[
		this := #anyString
	    ]
	] ifFalse:[
	    char == $# ifTrue:[
		previous ~~ #anyString ifTrue:[
		    this := #any
		]
	    ] ifFalse:[
		char == $[ ifTrue:[
		    matchSet := IdentitySet new.
		    idx := idx + 1.
		    idx > end ifTrue:[^ nil].
		    char := aString at:idx.
		    c1 := nil.
		    [char ~~ $]] whileTrue:[
			((char == $-) and:[c1 notNil]) ifTrue:[
			    idx := idx + 1.
			    idx > end ifTrue:[^ nil].
			    c2 := aString at:idx.
			    (c1 to:c2) do:[:c | matchSet add:c].
			    c1 := nil.
			    idx := idx + 1.
			] ifFalse:[
			    (char ~~ $]) ifTrue:[
				matchSet add:char.
				c1 := char.
				idx := idx + 1
			    ]
			].
			idx > end ifTrue:[^ nil].
			char := aString at:idx
		    ].
		    this := matchSet asString
		] ifFalse:[
		    this := char
		]
	    ]
	].
	this notNil ifTrue:[coll add:this. previous := this].
	idx := idx + 1
    ].

    ^ coll asArray

    "
     String matchScanArrayFrom:'*ute*'  
     String matchScanArrayFrom:'**ute**'  
     String matchScanArrayFrom:'*uter'   
     String matchScanArrayFrom:'[cC]#mpute[rR]'  
     String matchScanArrayFrom:'[abcd]*'      
     String matchScanArrayFrom:'[a-k]*'      
     String matchScanArrayFrom:'*some*compl*ern*' 
     String matchScanArrayFrom:'[a-'  
     String matchScanArrayFrom:'[a-zA-Z]'  
     String matchScanArrayFrom:'[a-z01234A-Z]'  
    "
! !

!CharacterArray methodsFor:'Compatibility - ST/V'!

byteAt:index put:aByte
    "store a byte at given index.
     This is an ST/V compatibility method."

    (aByte == 0) ifTrue:[
	"store a space instead"
	^ super basicAt:index put:(Character space)
    ].
    ^ super at:index put:(Character value:aByte)
!

replChar:oldChar with:newChar
    "return a copy of the receiver, with all oldChars replaced
     by newChar.
     This is an ST/V compatibility method."

    ^ self copy replaceAll:oldChar by:newChar

    "
     '12345678901234567890' replChar:$0 with:$* 
    "
!

replChar:oldChar withString:newString
    "return a copy of the receiver, with all oldChars replaced
     by newString (i.e. slice in the newString in place of the oldChar).
     This is an ST/V compatibility method."

    |tmpStream|

    tmpStream := WriteStream on:(self class new).
    self do:[:element |
	element = oldChar ifTrue:[
	    tmpStream nextPutAll:newString
	] ifFalse:[
	    tmpStream nextPut:element 
	].
    ].
    ^ tmpStream contents

   "
     '12345678901234567890' replChar:$0 withString:'foo' 
     'a string with spaces' replChar:$  withString:' foo '  
    "
!

trimBlanks
    "return a copy of the receiver without leading
     and trailing spaces.
     This is an ST/V compatibility method."

    ^ self withoutSpaces

    "
     '    spaces at beginning' trimBlanks     
     'spaces at end    ' trimBlanks           
     '    spaces at beginning and end     ' trimBlanks    
     'no spaces' trimBlanks              
    "
! !

!CharacterArray methodsFor:'Compatibility - VisualAge'!

addLineDelimiter
    "replace all '\'-characters by line delimiter (cr) - characters.
     This has been added for VisualAge compatibility."

    ^ self withCRs
!

bindWith:aString
    "return a copy of the receiver, where a '%1' escape is
     replaced by aString.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:aString)

    "
     'do you like %1 ?' bindWith:'smalltalk'
    "
!

bindWith:string1 with:string2
    "return a copy of the receiver, where a '%1' escape is
     replaced by string1 and '%2' is replaced by string2.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:string1 with:string2)

    "
     'do you prefer %1 or rather %2 ?'
	bindWith:'smalltalk' with:'c++'
    "
!

bindWith:str1 with:str2 with:str3
    "return a copy of the receiver, where a '%1', '%2' and '%3' escapes
     are replaced by str1, str2 and str3 respectively.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:str1 with:str2 with:str3)

    "
     'do you prefer %1 or rather %2 (not talking about %3) ?'
	bindWith:'smalltalk' with:'c++' with:'c'
    "
!

bindWith:str1 with:str2 with:str3 with:str4
    "return a copy of the receiver, where a '%1', '%2', '%3' and '%4' escapes
     are replaced by str1, str2, str3 and str4 respectively.
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:(Array with:str1 with:str2 with:str3 with:str4)

    "
     'do you prefer %1 or rather %2 (not talking about %3 or even %4) ?'
	bindWith:'smalltalk' with:'c++' with:'c' with:'assembler'
    "
!

bindWithArguments:anArrayOfStrings
    "return a copy of the receiver, where a '%i' escape
     is replaced by the coresponding string from the argument array.
     'i' may be between 1 and 9 (i.e. a maximum of 9 placeholders is allowed).
     This has been added for VisualAge compatibility."

    ^ self expandPlaceholdersWith:anArrayOfStrings

    "
     'do you prefer %1 or rather %2 (not talking about %3) ?'
	bindWithArguments:#('smalltalk' 'c++' 'c')
    "
!

subStrings
    "return an array consisting of all words contained in the receiver.
     Words are separated by whitespace.
     This has been added for VisualAge compatibility."

    ^ self asCollectionOfWords

    "
     'hello world, this is smalltalk' subStrings
    "
!

subStrings:separatorCharacter
    "return an array consisting of all words contained in the receiver.
     Words are separated by separatorCharacter.
     This has been added for VisualAge compatibility."

    ^ self asCollectionOfSubstringsSeparatedBy:separatorCharacter

    "
     'foo:bar:baz:smalltalk' subStrings:$:
    "
!

trimSeparators
    "return a copy of the receiver without leading and trailing whiteSpace"

    ^ self withoutSeparators
! !

!CharacterArray methodsFor:'character searching'!

includesMatchCharacters
    "return true if the receiver includes any meta characters (i.e. $* or $#) 
     for match operations; false if not"

    ^ self includesAny:'*#['
!

indexOfNonSeparatorStartingAt:startIndex
    "return the index of the next non-whitespace character"

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.
    mySize := self size.

    start to:mySize do:[:index |
	(self at:index) isSeparator ifFalse:[^ index]
    ].
    ^ 0

    "
     '    hello world' indexOfNonSeparatorStartingAt:1 
    "
    "
     |s index1 index2|
     s := '   foo    bar      baz'.
     index1 := s indexOfNonSeparatorStartingAt:1.
     index2 := s indexOfSeparatorStartingAt:index1.
     s copyFrom:index1 to:index2 - 1
    "
!

indexOfSeparator
    "return the index of the first whitespace character"

    ^ self indexOfSeparatorStartingAt:1

    "'hello world' indexOfSeparator"
!

indexOfSeparatorStartingAt:startIndex
    "return the index of the next whitespace character"

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.
    mySize := self size.

    start to:mySize do:[:index |
	(self at:index) isSeparator ifTrue:[^ index]
    ].
    ^ 0

    "'hello world' indexOfSeparatorStartingAt:3"
! !

!CharacterArray methodsFor:'comparing'!

< something
    "Compare the receiver with the argument and return true if the
     receiver is less than the argument. Otherwise return false."

    ^ (something > self)
!

<= something
    "Compare the receiver with the argument and return true if the
     receiver is less than or equal to the argument. Otherwise return false."

    ^ (self > something) not
!

= aString
    "Compare the receiver with the argument and return true if the
     receiver is equal to the argument. Otherwise return false.
     This compare does NOT ignore case differences, 
     therefore 'foo' = 'Foo' will return false.
     Since this is incompatible to ST-80 (at least, V2.x) , this may change."

    |mySize    "{ Class: SmallInteger }"
     otherSize |

    aString species == self species ifFalse:[^ false].
    mySize := self size.
    otherSize := aString size.
    mySize == otherSize ifFalse:[^ false].

    1 to:mySize do:[:index |
	(self at:index) = (aString at:index) ifFalse:[^ false].
    ].
    ^ true

    "
     'foo' = 'Foo'  
     'foo' = 'bar'  
     'foo' = 'foo'   
    "
!

> aString
    "Compare the receiver with the argument and return true if the
     receiver is greater than the argument. Otherwise return false.
     In contrast to ST-80, case differences are NOT ignored, thus
     'foo' > 'Foo' will return true; use #sameAs: to compare ignoring cases.. 
     Since this is incompatible to ST-80, this may change."

    |mySize    "{ Class: SmallInteger }"
     otherSize "{ Class: SmallInteger }" 
     n         "{ Class: SmallInteger }" 
     c1 c2|

    mySize := self size.
    otherSize := aString size.
    n := mySize min:otherSize.

    1 to:n do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	c1 > c2 ifTrue:[^ true].
	c1 < c2 ifTrue:[^ false].
    ].
    ^ mySize > otherSize
!

>= something
    "Compare the receiver with the argument and return true if the
     receiver is greater than or equal to the argument.
     Otherwise return false."

    ^ (something > self) not
!

compareWith:aString
    "Compare the receiver with the argument and return 1 if the receiver is
     greater, 0 if equal and -1 if less than the argument. 
     Case differences are NOT ignored, thus
     'foo' compareWith: 'Foo' will return 1."

    |mySize    "{ Class: SmallInteger }"
     otherSize "{ Class: SmallInteger }" 
     n         "{ Class: SmallInteger }" 
     c1 c2|

    mySize := self size.
    otherSize := aString size.
    n := mySize min:otherSize.

    1 to:n do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	c1 > c2 ifTrue:[^ 1].
	c1 < c2 ifTrue:[^ -1].
    ].
    mySize > otherSize ifTrue:[^ 1].
    mySize < otherSize ifTrue:[^ -1].
    ^ 0
!

hash
    "return an integer useful as a hash-key"

%{  /* NOCONTEXT */

    REGISTER int g, val;
    REGISTER unsigned char *cp, *cp0;
    int l;

    cp = _stringVal(self);
    l = _stringSize(self);
    if (__qClass(self) != String) {
	int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(self))->c_ninstvars));

	cp += n;
	l -= n;
    }

    /*
     * this is the dragon-book algorithm with a funny start
     * value (to give short strings a number above 8192)
     */
    val = 12345;
    for (cp0 = cp, cp += l - 1; cp >= cp0; cp--) {
	val = (val << 5) + (*cp & 0x1F);
	if (g = (val & 0x3E000000))
	    val ^= g >> 25 /* 23 */ /* 25 */;
	val &= 0x3FFFFFFF;
    }

    if (l) {
	l |= 1; 
	val = (val * l) & 0x3FFFFFFF;
    }

    RETURN ( _MKSMALLINT(val) );
%}
!

sameAs:aString
    "Compare the receiver with the argument like =, but ignore
     case differences. Return true or false."

    |mySize "{ Class: SmallInteger }"
     otherSize c1 c2|

    mySize := self size.
    otherSize := aString size.
    mySize == otherSize ifFalse:[^ false].

    1 to:mySize do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	c1 == c2 ifFalse:[
	    c1 asLowercase = c2 asLowercase ifFalse:[^ false].
	]
    ].
    ^ true

    "
     'foo' sameAs: 'Foo'   
     'foo' sameAs: 'bar' 
     'foo' sameAs: 'foo'   
    "
!

sameCharacters:aString
    "count & return the number of characters which are the same
     (ignoring case) in the receiver and the argument, aString."

    |n "{ Class: SmallInteger }"
     otherSize c1 c2 cnt|

    n := self size.
    n := n min:(aString size).

    cnt := 0.
    1 to:n do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	((c1 == c2)
	or:[c1 asLowercase = c2 asLowercase]) ifTrue:[
	    cnt := cnt + 1
	]
    ].
    ^ cnt

    "
     'foobarbaz' sameCharacters: 'foo'   
     'foobarbaz' sameCharacters: 'Foo'   
     'foobarbaz' sameCharacters: 'baz'   
    "
! !

!CharacterArray methodsFor:'converting'!

asArrayOfSubstrings
    "return an array of substrings from the receiver, interpreting
     separators (i.e. spaces & newlines) as word-delimiters.
     This is a compatibility method - the actual work is done in
     asCollectionOfWords."

    ^ self asCollectionOfWords asArray

    "
     '1 one two three four 5 five' asArrayOfSubstrings  
    "
!

asCollectionOfLines
    "return a collection containing the lines (separated by cr) 
     of the receiver. If multiple cr's occur in a row, the result will
     contain empty strings."

    ^ self asCollectionOfSubstringsSeparatedBy:Character cr

    "
     '1 one\2 two\3 three\4 four\5 five' withCRs asCollectionOfLines
     '1 one\\\\2 two\3 three' withCRs asCollectionOfLines  
    "
!

asCollectionOfSubstringsSeparatedBy:aCharacter
    "return a collection containing the lines (separated by aCharacter) 
     of the receiver. If aCharacter occurs multiple times in a row, 
     the result will contain empty strings."

    |lines myClass
     numberOfLines "{ Class:SmallInteger }"
     startIndex    "{ Class:SmallInteger }"
     stopIndex     "{ Class:SmallInteger }" |

    "
     count first, to avoid regrowing of the OC
    "
    numberOfLines := (self occurrencesOf:aCharacter) + 1.
    lines := OrderedCollection new:numberOfLines.
    myClass := self species.

    startIndex := 1.
    1 to:numberOfLines do:[:lineNr |
	stopIndex := self indexOf:aCharacter startingAt:startIndex.
	stopIndex == 0 ifTrue:[
	    stopIndex := self size
	] ifFalse: [
	    stopIndex := stopIndex - 1.
	].

	(stopIndex < startIndex) ifTrue: [
	    lines add:(myClass new:0)
	] ifFalse: [
	    lines add:(self copyFrom:startIndex to:stopIndex)
	].
	startIndex := stopIndex + 2
    ].
    ^ lines

    "
     '1 one:2 two:3 three:4 four:5 five' withCRs asCollectionOfSubstringsSeparatedBy:$: 
     '1 one 2 two 3 three 4 four 5 five' withCRs asCollectionOfSubstringsSeparatedBy:Character space
    "
!

asCollectionOfSubstringsSeparatedByAny:aCollectionOfSeparators
    "return a collection containing the words (separated by any character
     from aCollectionOfSeparators) of the receiver.
     This allows breaking up strings using any character as separator."

    |words
     start  "{ Class:SmallInteger }" 
     stop   "{ Class:SmallInteger }" 
     mySize "{ Class:SmallInteger }"|

    words := OrderedCollection new.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	"skip multiple separators"
	[aCollectionOfSeparators includes:(self at:start)] whileTrue:[
	    start := start + 1 .
	    start > mySize ifTrue:[
		^ words
	    ].
	].

	stop := self indexOfAny:aCollectionOfSeparators startingAt:start.
	stop == 0 ifTrue:[
	    words add:(self copyFrom:start to:mySize).
	    ^ words
	].
	words add:(self copyFrom:start to:(stop - 1)).
	start := stop
    ].
    ^ words

    "
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:#($:)
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:':' 
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:(Array with:$: with:Character space) 
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:': ' 
     'h1e2l3l4o' asCollectionOfSubstringsSeparatedByAny:($1 to: $9) 
    "
!

asCollectionOfWords
    "return a collection containing the words (separated by whitespace) 
     of the receiver. Multiple occurences of whitespace characters will
     be treated like one - i.e. whitespace is skipped."

    |words
     start  "{ Class:SmallInteger }" 
     stop   "{ Class:SmallInteger }" 
     mySize "{ Class:SmallInteger }"|

    words := OrderedCollection new.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	start := self indexOfNonSeparatorStartingAt:start.
	start == 0 ifTrue:[
	    ^ words
	].
	stop := self indexOfSeparatorStartingAt:start.
	stop == 0 ifTrue:[
	    words add:(self copyFrom:start to:mySize).
	    ^ words
	].
	words add:(self copyFrom:start to:(stop - 1)).
	start := stop
    ].
    ^ words

    "
     'hello world isnt this nice' asCollectionOfWords
     '    hello    world   isnt   this   nice  ' asCollectionOfWords
     'hello' asCollectionOfWords
     '' asCollectionOfWords
     '      ' asCollectionOfWords
    "
!

asComposedText
    ^ ComposedText fromString:self
!

asFilename
    "return a Filename with pathname taken from the receiver"

    ^ Filename named:self
!

asFloat
    "read a float number from the receiver.
     Notice, that errors may occur during the read, so you better
     setup some signal handler when using this method."

    ^ (Number readFromString:self) asFloat

    "
     '0.123' asFloat 
     '12345' asFloat
     '(1/5)' asFloat
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asFloat] 
    "
!

asInteger
    "read an integer from the receiver.
     Notice, that errors may occur during the read, so you better
     setup some signal handler when using this method."

    ^ Integer readFromString:self

    "
     '12345678901234567890' asInteger
     '-1234' asInteger
     '0.123' asInteger   <- reader stops at ., returning 0 here
     '0.123' asNumber    <- returns what you expect
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asInteger] 
    "
!

asLowercase
    "return a copy of myself in lowercase letters"

    |newStr
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    1 to:mySize do:[:i |
	newStr at:i put:(self at:i) asLowercase
    ].
    ^ newStr

    "
     'HelloWorld' asLowercase   
     'HelloWorld' asLowercaseFirst   
    "
!

asLowercaseFirst
    "return a copy of myself where the first character is
     converted to lowercase."

    |newString sz|

    sz := self size.
    newString := self copyFrom:1 to:sz.
    sz > 0 ifTrue:[
	newString at:1 put:(newString at:1) asLowercase
    ].
    ^ newString

    "
     'HelloWorld' asLowercase   
     'HelloWorld' asLowercaseFirst   
    "
!

asNumber
    "read a number from the receiver.
     Notice, that (in contrast to ST-80) errors may occur during the read, 
     so you better setup some signal handler when using this method.
     This may change if ANSI specifies it."

"/ ST-80 behavior:
"/  ^ Number readFromString:self onError:0

    ^ Number readFromString:self

    "
     '123'     asNumber
     '123.567' asNumber
     '(5/6)'   asNumber
     'foo'     asNumber
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asNumber] 
    "
!

asNumberFromFormatString:ignored
    "read a number from the receiver, ignoring any nonDigit characters.
     This is typically used to convert from strings which include
     dollar-signs or millenium digits. However, this method also ignores
     the decimal point (if any) and therefore should be used with care."

    |tempString|

    tempString := self collect:[:char | char isDigit].
    ^ Number readFromString:tempString onError:0

    "
     'USD 123' asNumberFromFormatString:'foo'
     'DM 123'  asNumberFromFormatString:'foo'
     '123'     asNumberFromFormatString:'foo'
     '123.567' asNumberFromFormatString:'foo'
     '(5/6)'   asNumberFromFormatString:'foo'
     'foo'     asNumberFromFormatString:'foo'
    "
!

asSingleByteString
    "return the receiver converted to a 'normal' string"

    ^ String fromString:self
!

asString
    "return myself - I am a string"

    ^ self
!

asStringCollection
    "return a collection of lines from myself."

    ^ StringCollection from:self
!

asTwoByteString
    "return the receiver converted to a two-byte string"

    ^ TwoByteString fromString:self
!

asUppercase
    "return a copy of myself in uppercase letters"

    |newStr
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    1 to:mySize do:[:i |
	newStr at:i put:(self at:i) asUppercase
    ].
    ^ newStr

    "
     'helloWorld' asUppercase      
     'helloWorld' asUppercaseFirst 
    "
!

asUppercaseFirst
    "return a copy of myself where the first character is
     converted to uppercase."

    |newString sz|

    sz := self size.
    newString := self copyFrom:1 to:sz.
    sz > 0 ifTrue:[
	newString at:1 put:(newString at:1) asUppercase
    ].
    ^ newString

    "
     'helloWorld' asUppercase      
     'helloWorld' asUppercaseFirst 
     'HelloWorld' asUppercaseFirst   
    "
!

replaceFrom:aString decode:encoding 
    "this is an experimental interface - unfinished"

    |table|

    table := self class decoderTableFor:encoding.
    table isNil ifTrue:[
	^ self replaceFrom:aString
    ].

    1 to:self size do:[:index |
	|char oldCode newCode|

	char := aString at:index.
	oldCode := char asciiValue.
	newCode := table at:(oldCode + 1).
	newCode ~~ oldCode ifTrue:[
	    self at:index put:(Character value:newCode)
	]
    ].

    "Created: 20.10.1995 / 23:00:09 / cg"
    "Modified: 20.10.1995 / 23:08:16 / cg"
!

string
    ^ self
!

tokensBasedOn:aCharacter
    "this is an ST-80 alias for the ST/X method
	asCollectionOfSubstringsSeparatedBy:"

    ^ self asCollectionOfSubstringsSeparatedBy:aCharacter

    "
     'hello:world:isnt:this nice' tokensBasedOn:$:
     'foo,bar,baz' tokensBasedOn:$,
     '/etc/passwd' asFilename readStream nextLine tokensBasedOn:$:
    "
! !

!CharacterArray methodsFor:'copying'!

concatenate:string1 and:string2
    "return the concatenation of myself and the arguments, string1 and string2.
     This is equivalent to self , string1 , string2
     - generated by compiler when such a construct is detected and the receiver
     is known to be a string."

    ^ self , string1 , string2
!

concatenate:string1 and:string2 and:string3
    "return the concatenation of myself and the string arguments.
     This is equivalent to self , string1 , string2 , string3
     - generated by compiler when such a construct is detected and the receiver
     is known to be a string."

    ^ self , string1 , string2 , string3
! !

!CharacterArray methodsFor:'displaying'!

displayOn:aGc x:x y:y
    "display the receiver in a graphicsContext - this method allows
     strings to be used like DisplayObjects."

    ^ aGc displayString:self x:x y:y.
! !

!CharacterArray methodsFor:'padded copying'!

leftPaddedTo:size
    "return a new string of length size, which contains the receiver
     right-adjusted (i.e. padded on the left).
     Characters on the left are filled with spaces.
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged."

    ^ self leftPaddedTo:size with:(Character space)

    "
     'foo' leftPaddedTo:10  
     'fooBar' leftPaddedTo:5      
     123 printString leftPaddedTo:10        
    "
!

leftPaddedTo:size with:padCharacter
    "return a new string of length size, which contains the receiver
     right-adjusted (i.e. padded on the left).
     Characters on the left are filled with padCharacter.
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged."

    |len s|

    len := self size.
    (len < size) ifTrue:[
	s := self species new:size withAll:padCharacter.
	s replaceFrom:(size - len + 1) with:self.
	^ s
    ]

    "
     'foo' leftPaddedTo:10 with:$.      
     'fooBar' leftPaddedTo:5 with:$.      
     123 printString leftPaddedTo:10 with:$.        
     (' ' , 123 printString) leftPaddedTo:10 with:$.        
     (Float pi printString) leftPaddedTo:15 with:(Character space)  
     (Float pi printString) leftPaddedTo:15 with:$-           
     (' ' , Float pi class name) leftPaddedTo:15 with:$.     
    "
!

paddedTo:newSize
     "return a new string consisting of the receivers characters,
     plus spaces up to length.
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged."

     ^ self paddedTo:newSize with:(Character space)

    "
     'foo' paddedTo:10            
     123 printString paddedTo:10 
    "
!

paddedTo:newSize with:padCharacter
    "return a new string consisting of the receivers characters,
     plus pad characters up to length.
     If the receivers size is equal or greater than the length argument, 
     the  original receiver is returned unchanged."

    |s len|

    len := self size.
    len < newSize ifTrue:[
	s := self species new:newSize withAll:padCharacter.
	s replaceFrom:1 to:len with:self.
	^ s
    ]

    "
     'foo' paddedTo:10 with:$.             
     123 printString paddedTo:10 with:$*   
     (Float pi printString) paddedTo:15 with:(Character space)  
     (Float pi printString) paddedTo:15 with:$-  
     (Float pi class name , ' ') paddedTo:15 with:$.  
    "
!

centerPaddedTo:newSize
     "return a new string consisting of the receivers characters,
     plus spaces up to length and center the receivers characters in
     the resulting string.
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged."

     ^ self centerPaddedTo:newSize with:(Character space)

    "
     'foo' centerPaddedTo:10            
     123 printString centerPaddedTo:10 
    "

    "Created: 25.11.1995 / 10:53:57 / cg"
!

centerPaddedTo:size with:padCharacter
    "return a new string of length size, which contains the receiver
     centered (i.e. padded on both sides).
     Characters are filled with padCharacter.
     If the receivers size is equal or greater than the length argument, 
     the original receiver is returned unchanged."

    |len s|

    len := self size.
    (len < size) ifTrue:[
	s := self species new:size withAll:padCharacter.
	s replaceFrom:(size - len) // 2  + 1 with:self.
	^ s
    ]

    "
     'foo' centerPaddedTo:11 with:$.     
     'fooBar' centerPaddedTo:5 with:$.      
     123 printString centerPaddedTo:10 with:$.        
     (' ' , 123 printString) centerPaddedTo:10 with:$.        
     (Float pi printString) centerPaddedTo:15 with:(Character space)  
     (Float pi printString) centerPaddedTo:15 with:$-           
     (' ' , Float pi class name) centerPaddedTo:15 with:$.     
    "
! !

!CharacterArray methodsFor:'pattern matching'!

findMatchString:matchString
    "like findString/indexOfSubCollection, but allowing match patterns.
     find matchstring; if found, return the index;
     if not found, return 0."

    ^ self findMatchString:matchString startingAt:1 ignoreCase:false ifAbsent:[0] 
!

findMatchString:matchString startingAt:index
    "like findString, but allowing match patterns.
     find matchstring, starting at index. if found, return the index;
     if not found, return 0."

    ^ self findMatchString:matchString startingAt:index ignoreCase:false ifAbsent:[0] 
!

findMatchString:matchString startingAt:index ignoreCase:ignoreCase ifAbsent:exceptionBlock
    "like findString, but allowing match patterns.
     find matchstring, starting at index. if found, return the index;
     if not found, return the result of evaluating exceptionBlock.
     This is a q&d hack - not very efficient"

    |firstChar 
     startIndex "{ Class: SmallInteger }"
     matchSize  "{ Class: SmallInteger }"
     mySize     "{ Class: SmallInteger }"
     realMatchString|

    matchSize := matchString size.
    matchSize == 0 ifTrue:[^ index]. "empty string matches"

    realMatchString := matchString.
    (realMatchString endsWith:$*) ifFalse:[
	realMatchString := realMatchString , '*'.
	matchSize := matchSize + 1
    ].

    mySize := self size.
    firstChar := realMatchString at:1.

    firstChar asString includesMatchCharacters ifTrue:[
	index to:mySize do:[:col |
	    (realMatchString match:self from:col to:mySize ignoreCase:ignoreCase)
	    ifTrue:[^ col]
	].
	^ exceptionBlock value.
    ].
    startIndex := self indexOf:firstChar startingAt:index.
    [startIndex == 0] whileFalse:[
	(realMatchString match:self from:startIndex to:mySize ignoreCase:ignoreCase)
	ifTrue:[^ startIndex].
	startIndex := self indexOf:firstChar startingAt:(startIndex + 1)
    ].
    ^ exceptionBlock value

    "
     'one two three four' findMatchString:'o[nu]'
     'one two three four' findMatchString:'o[nu]' startingAt:3
    "
!

includesMatchString:matchString
    "like includesString, but allowing match patterns.
     find matchstring; if found, return true, otherwise return false"

    ^ (self findMatchString:matchString) ~~ 0

    "
     'hello world' includesMatchString:'h*'
     'hello world' includesMatchString:'h[aeiou]llo' 
     'hello world' includesMatchString:'wor*'     
     'hello world' includesMatchString:'woR*'     
    "
!

match:aString
    "return true if aString matches self, where self may contain meta-match 
     characters $* (to match any string) or $# (to match any character).
     or [...] to match a set of characters.
     Lower/uppercase are considered different.
     NOTICE: match-meta character interpretation is like in unix-matching, 
	     NOT the ST-80 meaning."

    ^ self match:aString from:1 to:aString size ignoreCase:false

    "
     '*ute*' match:'computer' 
     '*uter' match:'computer' 
     'uter*' match:'computer' 
     '*ute*' match:'' 
     '[abcd]*' match:'computer' 
     '[abcd]*' match:'komputer' 
     '*some*compl*ern*' match:'this is some more complicated pattern match' 
     '*some*compl*ern*' match:'this is another complicated pattern match' 
    "
!

match:aString from:start to:stop ignoreCase:ignoreCase
    "return true if part of aString matches myself, 
     where self may contain meta-match 
     characters $* (to match any string) or $# (to match any character)
     or [...] to match a set of characters.
     If ignoreCase is true, lower/uppercase are considered the same.
     NOTICE: match-meta character interpretation is like in unix-matching, 
	     NOT the ST-80 meaning."

    |matchScanArray|

    "
     keep the matchScanArray from the most recent match -
     avoids parsing the pattern over-and over if multiple searches
     are done with the same pattern.
    "
    (PreviousMatch notNil
    and:[PreviousMatch key = self]) ifTrue:[
	matchScanArray := PreviousMatch value
    ] ifFalse:[
	matchScanArray := self class matchScanArrayFrom:self.
	matchScanArray isNil ifTrue:[
	    'CHARARRAY: invalid matchpattern:' infoPrint. self infoPrintNL.
	    ^ false
	].
	PreviousMatch := self -> matchScanArray.
    ].

    ^ self class
	matchScan:matchScanArray 
	from:1 to:matchScanArray size
	with:aString 
	from:start to:stop 
	ignoreCase:ignoreCase

    "
     '*ute*' match:'12345COMPUTER' from:1 to:5 ignoreCase:true 
     '*ute*' match:'12345COMPUTER' from:6 to:13 ignoreCase:true  
    "
!

match:aString ignoreCase:ignoreCase
    "return true if aString matches self, where self may contain meta-match 
     characters $* (to match any string) or $# (to match any character)
     or [...] to match a set of characters.
     If ignoreCase is true, lower/uppercase are considered the same.
     NOTICE: match-meta character interpretation is like in unix-matching, 
	     NOT the ST-80 meaning."

    ^ self match:aString from:1 to:aString size ignoreCase:ignoreCase

    "
     '*ute*' match:'COMPUTER' ignoreCase:true  
     '*uter' match:'COMPUTER' ignoreCase:false 
     '[abcd]*' match:'computer' ignoreCase:false 
     '[abcd]*' match:'Computer' ignoreCase:false 
     '[a-k]*' match:'komputer' ignoreCase:false   
     '[a-k]*' match:'zomputer' ignoreCase:false    
     '[a-k]*' match:'Komputer' ignoreCase:false    
     '[a-k]*' match:'Komputer' ignoreCase:true     
     '*some*compl*ern*' match:'this is some more complicated pattern match' ignoreCase:true 
     '*some*compl*ern*' match:'this is another complicated pattern match' ignoreCase:true 

     Time millisecondsToRun:[
	Symbol allInstancesDo:[:sym |
	    '[ab]*' match:sym ignoreCase:false
	]
     ]. 
    "
! !

!CharacterArray methodsFor:'printing & storing'!

article
    "return an article string for the receiver."

    |firstChar|

    firstChar := (self at:1) asLowercase. 
    (firstChar isVowel or:[firstChar == $x]) ifTrue:[
	firstChar ~~ $u ifTrue:[
	     ^ 'an'
	]
    ].
    ^ 'a'
!

displayString
    "return a string to display the receiver - use storeString to have
     quotes around."

    ^ self storeString
!

printOn:aStream
    "print the receiver on aStream"

    aStream nextPutAll:self
!

printString
    "return a string for printing - thats myself"

    ^ self
! !

!CharacterArray methodsFor:'queries'!

encoding
    ^ #unknown
!

isString
    "return true, if the receiver is some kind of string;
     true is returned here - redefinition of Object>>isString."

    ^ true
! !

!CharacterArray methodsFor:'special string converting'!

chopTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     in the middle have been removed for a total string length
     of maxLen."

    |sz n1 n2|

    (sz := self size) > maxLen ifTrue:[
	n1 := n2 := maxLen // 2.
	maxLen odd ifTrue:[
	    n2 := n1 + 1
	].
	^ (self copyFrom:1 to:n1) , (self copyFrom:sz - n2 + 1)
    ]

    "
     '12345678901234'   chopTo:15            
     '123456789012345'  chopTo:15         
     '1234567890123456' chopTo:15      
     'aShortString' chopTo:15 
     'aVeryLongNameForAStringThatShouldBeShortened' chopTo:15 
    "
!

contractAtBeginningTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     at the beginning have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz|

    (sz := self size) > maxLen ifTrue:[
	^ '...' , (self copyFrom:(sz - (maxLen - 4))) 
    ]

    "
     '12345678901234' contractAtBeginningTo:15          
     '123456789012345' contractAtBeginningTo:15          
     '1234567890123456' contractAtBeginningTo:15          
     'aShortString' contractAtBeginningTo:15          
     'aVeryLongNameForAStringThatShouldBeShortened' contractAtBeginningTo:15
    "
!

contractAtEndTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     at the end have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz|

    (sz := self size) > maxLen ifTrue:[
	^ self copyReplaceFrom:maxLen - 3
			    with:'...'
    ]

    "
     '12345678901234' contractAtEndTo:15          
     '123456789012345' contractAtEndTo:15          
     '1234567890123456' contractAtEndTo:15          
     'aShortString' contractAtEndTo:15          
     'aVeryLongNameForAStringThatShouldBeShortened' contractAtEndTo:15 
    "
!

contractTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     in the middle have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz "{ SmallInteger }"
     halfSize "{ SmallInteger }"|

    (sz := self size) > maxLen ifTrue:[
	halfSize := maxLen // 2.
	^ self copyReplaceFrom:halfSize - 1
			    to:sz - maxLen + halfSize + 1
			    with:'...'
    ]

    "
     '12345678901234' contractTo:15          
     '123456789012345' contractTo:15          
     '1234567890123456' contractTo:15        
     'aShortString' contractTo:15 
     'aVeryLongNameForAStringThatShouldBeShortened' contractTo:15 
    "
!

expandPlaceholdersWith:argArray
    "return a copy of the receiver, where all %i escapes are
     replaced by corresponding arguments from the argArray.
     I.e. 'hello %1; how is %2' expandPlaceholdersWith:#('world' 'this') results
     in the new string 'hello world; how is this'.
     To get a '%' character, use a '%%'-escape.
     See also bindWith:... for VisualAge compatibility."

    |expandedString next 
     idx   "{ SmallInteger }"
     start "{ SmallInteger }"
     stop  "{ SmallInteger }"|

    expandedString := ''.
    stop := self size.
    start := 1.
    [start <= stop] whileTrue:[
	idx := self indexOf:$% startingAt:start.
	idx == 0 ifTrue:[
	    ^ expandedString , (self copyFrom:start to:stop)
	].
	"found a %"
	expandedString := expandedString , (self copyFrom:start to:(idx - 1)).
	next := self at:(idx + 1).
	(next == $%) ifTrue:[
	    expandedString := expandedString , '%'
	] ifFalse:[
	    expandedString := expandedString , (argArray at:(next digitValue)) printString
	].
	start := idx + 2
    ].
    ^  expandedString

    "
     'hello %1' expandPlaceholdersWith:#('world') 
     'hello %1; how is %2' expandPlaceholdersWith:#('world' 'this') 
     'hello %2; how is %1' expandPlaceholdersWith:#('world' 'this') 
    "
!

withCRs
    "return a new string consisting of receivers characters
     with all \-characters replaced by cr-characters."

    ^ self copy replaceAll:$\ by:(Character cr)

    "
     'hello\world' withCRs
    "
!

withEscapes
    "return a new string consisting of receivers characters
     with all \X-character escapes replaced by corresponding-characters.
     (similar to the way C-language Strings are converted).
     The following escapes are supported:
	\r      return character
	\n      newline character
	\b      backspace character
	\f      formfeed character
	\t      tab character
	\\      the \ character itself
	\nnn    three digit octal number defining the characters ascii value
	\other  other

     Notice, that \' is NOT a valid escape, since the general syntax of
     string constants is not affected by this method.

     Although easily implementable, this is NOT done automatically
     by the compiler (due to a lack of a language standard for this).
     However, the compiler may detect sends ot #withEscapes to string literals
     and place a modified string constant into the binary/byte-code.
     Therefore, no runtime penalty will be payed for using these escapes.
     (not in pre 2.11 versions)
    "

    |sz      "{ SmallInteger }"
     newSize "{ SmallInteger }"
     srcIdx  "{ SmallInteger }"
     dstIdx  "{ SmallInteger }"
     val     "{ SmallInteger }"
     newString next start| 

    "
     first, count the number of escapes, to allow preallocation
     of the new string ...
     (it is faster to scan the string twice than to reallocate it multiple
      times in a WriteStream)
    "
    sz := newSize := self size.
    srcIdx := 1.
    [(srcIdx := self indexOf:$\ startingAt:srcIdx) ~~ 0] whileTrue:[
	srcIdx == sz ifFalse:[
	    newSize := newSize - 1.
	    srcIdx := srcIdx + 1.
	    next := self at:srcIdx.
	    next == $0 ifTrue:[
		[(self at:srcIdx) isDigit] whileTrue:[
		    newSize := newSize - 1. srcIdx := srcIdx + 1.
		]
	    ].
	].
	srcIdx := srcIdx + 1.
    ].

    newSize == sz ifTrue:[
	^ self
    ].

    newString := self species new:newSize.
    "
     copy over, replace escapes
    "
    srcIdx := dstIdx := 1.
    [srcIdx <= sz] whileTrue:[
	next := self at:srcIdx.
	srcIdx := srcIdx + 1.
	next == $\ ifTrue:[
	    srcIdx <= sz ifTrue:[
		next := self at:srcIdx.
		srcIdx := srcIdx + 1.
		next == $r ifTrue:[
		    next := Character return
		] ifFalse:[
		    next == $n ifTrue:[
			next := Character nl
		    ] ifFalse:[
			next == $b ifTrue:[
			    next := Character backspace
			] ifFalse:[
			    next == $f ifTrue:[
				next := Character newPage
			    ] ifFalse:[
				next == $t ifTrue:[
				    next := Character tab
				] ifFalse:[
				    next == $0 ifTrue:[
					val := 0.
					[next isDigit] whileTrue:[
					    val := val * 8 + next digitValue.
					    next := self at:srcIdx.
					    srcIdx := srcIdx + 1.
					].
					next := Character value:val.
				    ]
				]
			    ]
			]
		    ]
		].
	    ].
	].
	newString at:dstIdx put:next.
	dstIdx := dstIdx + 1.
    ].
    ^ newString

    "
     'hello world' withEscapes  
     'hello\world' withEscapes   
     'hello\world\' withEscapes   
     'hello world\' withEscapes   
     'hello\tworld' withEscapes   
     'hello\nworld\na\n\tnice\n\t\tstring' withEscapes   
     'hello\tworld\n' withEscapes   
     'hello\010world' withEscapes   
     'hello\r\nworld' withEscapes   
    "
!

withTabs
    "return a string consisting of the receivers characters
     where leading spaces are replaced by tabulator characters (assuming 8-col tabs).
     Notice: if the receiver does not contain any tabs, it is returned unchanged;
     otherwise a new string is returned.
     Limitation: only the very first spaces are replaced 
		 (i.e. if the receiver contains newLine characters,
		  no tabs are inserted after those lineBreaks)"

    |idx   "{ SmallInteger }" 
     nTabs "{ SmallInteger }" 
     newString|

    idx := self findFirst:[:c | (c ~~ Character space)].
    nTabs := (idx-1) // 8.
    nTabs == 0 ifTrue:[^ self].

    "any tabs"
    newString := self class new:(self size - (nTabs * 7)).
    newString atAll:(1 to:nTabs) put:(Character tab).
    newString replaceFrom:(nTabs + 1) with:self startingAt:(nTabs * 8 + 1).
    ^ newString

    "
     '12345678901234567890' withTabs 
     '       8901234567890' withTabs 
     '        901234567890' withTabs  
     '               67890' withTabs
     '                7890' withTabs
     '                 890' withTabs
    "
!

withTabsExpanded
    "return a string consisting of the receivers characters,
     where all tabulator characters are expanded into spaces (assuming 8-col tabs). 
     Notice: if the receiver does not contain any tabs, it is returned unchanged;
     otherwise a new string is returned.
     This does handle multiline strings."

    |idx "{ SmallInteger }" str|

    (self includes:(Character tab)) ifFalse:[^ self].
    str := WriteStream on:String new.

    idx := 1.
    self do:[:ch |
	ch == Character tab ifFalse:[
	    idx := idx + 1.
	    ch == Character cr ifTrue:[
		idx := 1
	    ].
	    str nextPut:ch.
	] ifTrue:[
	    (idx \\ 8) to:8 do:[:ii |
		str space.
		idx := idx + 1
	    ].
	]
    ].
    ^ str contents

    "
     ('1' , Character tab asString , 'x') withTabsExpanded          
     ('12345' , Character tab asString , 'x') withTabsExpanded      
     ('123456' , Character tab asString , 'x') withTabsExpanded     
     ('1234567' , Character tab asString , 'x') withTabsExpanded   
     ('12345678' , Character tab asString , 'x') withTabsExpanded   
     ('123456789' , Character tab asString , 'x') withTabsExpanded 

     (String with:Character tab
	     with:Character tab
	     with:$1) withTabsExpanded

     (String with:Character tab
	     with:$1
	     with:Character tab
	     with:$2) withTabsExpanded  

     (String with:Character tab
	     with:$1
	     with:Character cr
	     with:Character tab
	     with:$2) withTabsExpanded  
    "

    "Modified: 11.12.1995 / 15:27:50 / cg"
!

withoutCRs
    "return a new collection consisting of receivers elements
     with all cr-characters replaced by \-characters.
     This is the reverse operation of withCRs."

    ^ self copy replaceAll:(Character cr) by:$\
    "
     'hello
world' withoutCRs
    "
!

withoutLeadingSeparators
    "return a copy of myself without leading separators.
     Notice: this does remove tabs, newline or any other whitespace.
     Returns an empty string, if the receiver consist only of whitespace."

    |index|

    index := self indexOfNonSeparatorStartingAt:1.
    index ~~ 0 ifTrue:[
	index == 1 ifTrue:[
	    ^ self
	].
	^ self copyFrom:index
    ].
    ^ ''

    "
     '    foo    ' withoutLeadingSeparators  
     'foo    '     withoutLeadingSeparators   
     '    foo'     withoutLeadingSeparators  
     '       '     withoutLeadingSeparators   
     'foo'         withoutLeadingSeparators   
     ('  ' , Character tab asString , ' foo   ') withoutLeadingSeparators inspect 
    "
!

withoutSeparators
    "return a copy of myself without leading and trailing whitespace.
     Whitespace is space, tab, newline, formfeed.
     Use withoutSpaces, if you want to remove spaces only."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz|

    sz := self size.
    startIndex := 1.
    endIndex := sz.

    [(startIndex < endIndex) and:[(self at:startIndex) isSeparator]] whileTrue:[
	startIndex := startIndex + 1
    ].
    [(endIndex > 1) and:[(self at:endIndex) isSeparator]] whileTrue:[
	endIndex := endIndex - 1
    ].
    startIndex > endIndex ifTrue:[
	^ ''
    ].
    ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
	^ self
    ].
    ^ self copyFrom:startIndex to:endIndex

    "
     '    foo    ' withoutSeparators      
     '    foo' withoutSeparators      
     'foo    ' withoutSeparators      
     '       ' withoutSeparators      
     ('  foo' , Character tab asString , '    ') withoutSeparators inspect 
    "
!

withoutSpaces
    "return a copy of myself without leading and trailing spaces.
     Notice: this does NOT remove tabs, newline or any other whitespace.
     Use withoutSeparators for this."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz|

    sz := self size.
    startIndex := 1.
    endIndex := sz.

    [(startIndex < endIndex) and:[(self at:startIndex) == Character space]] whileTrue:[
	startIndex := startIndex + 1
    ].
    [(endIndex > 1) and:[(self at:endIndex) == Character space]] whileTrue:[
	endIndex := endIndex - 1
    ].
    startIndex > endIndex ifTrue:[
	^ ''
    ].
    ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
	^ self
    ].
    ^ self copyFrom:startIndex to:endIndex

    "
     '    foo    ' withoutSpaces  
     'foo    '     withoutSpaces   
     '    foo'     withoutSpaces  
     '       '     withoutSpaces   
     ('  foo' , Character tab asString , '    ') withoutSpaces inspect 
    "
! !

!CharacterArray methodsFor:'substring searching'!

findString:subString
    "find a substring. if found, return the index;
     if not found, return 0."

    ^ self indexOfSubCollection:subString startingAt:1 ifAbsent:[0]

    "
     'hello world' findString:'llo'   
     'hello world' findString:'ole'  
    "
!

findString:subString ifAbsent:exceptionBlock
    "find a substring. If found, return the index;
     if not found, return the result of evaluating exceptionBlock."

    ^ self indexOfSubCollection:subString startingAt:1 ifAbsent:exceptionBlock
!

findString:subString startingAt:index
    "find a substring, starting at index. if found, return the index;
     if not found, return 0."

    ^ self indexOfSubCollection:subString startingAt:index ifAbsent:[0]

    "
     'hello yello' findString:'llo' startingAt:1   
     'hello yello' findString:'llo' startingAt:5   
     'hello yello' findString:'llo' startingAt:15   
    "
!

findString:subString startingAt:index ifAbsent:exceptionBlock
    "find a substring, starting at index. if found, return the index;
     if not found, return the result of evaluating exceptionBlock."

    ^ self indexOfSubCollection:subString startingAt:index ifAbsent:exceptionBlock
!

includesString:aString
    "return true, if a substring is contained in the receiver"

    ^ (self indexOfSubCollection:aString startingAt:1 ifAbsent:[0]) ~~ 0

    "
     'hello world' includesString:'hel' 
     'hello world' includesString:'rld' 
     'hello world' includesString:'llo'  
     'hello world' includesString:'LLO'   
    "
!

indexOfSubCollection:subString startingAt:index ifAbsent:exceptionBlock
    "find a substring, starting at index. if found, return the index;
     if not found, return the result of evaluating exceptionBlock.
     This is a q&d hack - not very efficient"

    |firstChar found
     startIndex "{ Class: SmallInteger }"
     subSize    "{ Class: SmallInteger }"
     mySize     "{ Class: SmallInteger }"
     runIdx     "{ Class: SmallInteger }" |

    subSize := subString size.
    subSize == 0 ifTrue:[^ index]. "empty string matches"
    mySize := self size.
    firstChar := subString at:1.
    startIndex := self indexOf:firstChar startingAt:index.
    [startIndex == 0] whileFalse:[
	runIdx := startIndex.
	found := true.
	1 to:subSize do:[:i |
	    runIdx > mySize ifTrue:[
		found := false
	    ] ifFalse:[
		(subString at:i) ~~ (self at:runIdx) ifTrue:[
		    found := false
		]
	    ].
	    runIdx := runIdx + 1
	].
	found ifTrue:[
	    ^ startIndex
	].
	startIndex := self indexOf:firstChar startingAt:(startIndex + 1)
    ].
    ^ exceptionBlock value
!

restAfter:keyword withoutSeparators:strip
    "compare the left of the receiver with keyword,
     if it matches return the right. 
     Finally, if strip is true, remove whiteSpace.
     This method is used to match and extract lines of the form:
	something: rest
     where we are interrested in rest, but only if the receiver string
     begins with something. 

     You may wonder why such a specialized method exists here
     - this is so common when processing mailboxes,
     rcs files, nntp/pop3 responses, that is was considered worth
     a special method here to avoid having the code below a hundred
     times in variuos places."

    |rest|

    (self startsWith:keyword) ifTrue:[
	rest := self copyFrom:(keyword size + 1).
	strip ifTrue:[
	    rest := rest withoutSeparators
	].
	^ rest
    ].
    ^ nil

    "
     'foo: hello world' restAfter:'foo:' withoutSeparators:true 
     'funny: something' restAfter:'foo:' withoutSeparators:true 

     'foo:     hello world    ' restAfter:'foo:' withoutSeparators:true 
     'foo:     hello world    ' restAfter:'foo:' withoutSeparators:false 
    "

    "Created: 25.11.1995 / 11:04:18 / cg"
! !

!CharacterArray methodsFor:'testing'!

countWords
    "return the number of words, which are separated by separators"

    |tally "{ Class: SmallInteger }"
     start "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"
     stop ch|

    tally := 0.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	ch := self at:start.
	ch isSeparator ifTrue:[
	    start := start + 1
	] ifFalse:[
	    stop := self indexOfSeparatorStartingAt:start.
	    (stop == 0) ifTrue:[
		stop := mySize + 1
	    ].
	    tally := tally + 1.
	    start := stop
	]
    ].
    ^ tally

    "
     'hello world isnt this nice' countWords'
    "
!

isAlphaNumeric
    "return true, if the receiver is some alphanumeric word;
     i.e. consists of a letter followed by letters or digits."

    self size == 0 ifTrue:[
	"mhmh what is this ?"
	^ false
    ].
    (self at:1) isLetter ifFalse:[^ false].
    self do:[:char |
	char isLetterOrDigit ifFalse:[^ false].
    ].
    ^ true

    "
     'helloWorld' isAlphaNumeric  
     'foo1234' isAlphaNumeric    
     'f1234' isAlphaNumeric      
     '1234' isAlphaNumeric       
     '+' isAlphaNumeric         
    "
!

isBlank
    "return true, if the receiver contains spaces only"

    self do:[:char |
	char ~~ Character space ifTrue:[^ false].
    ].
    ^ true
!

levenshteinTo:aString
    "return the levenshtein distance to the argument, aString;
     this value corrensponds to the number of replacements that have to be
     made to get aString from the receiver.
     See IEEE transactions on Computers 1976 Pg 172 ff."

    "
     in the following, we assum that ommiting a character
     is less of an error than inserting an extra character.
     Therefore the different insertion (i) and deletion (d)
     values.
    "

    ^ self levenshteinTo:aString s:4 c:1 i:2 d:6

    "
     'ocmprt' levenshteinTo:'computer'
     'computer' levenshteinTo:'computer'
     'ocmputer' levenshteinTo:'computer'
     'cmputer' levenshteinTo:'computer'
     'computer' levenshteinTo:'cmputer'
     'Computer' levenshteinTo:'computer'
    "
!

levenshteinTo:aString s:substWeight c:caseWeight i:insrtWeight d:deleteWeight
    "parametrized levenshtein. 
     return the levenshtein distance to the argument, aString;
     this value corrensponds to the number of replacements that have to be
     made to get aString from the receiver.
     The arguments are the costs for substitution, case-change, insertion and 
     deletion of a character.
     See IEEE transactions on Computers 1976 Pg 172 ff"

    |d  "delta matrix"
     len1 "{ Class: SmallInteger }"
     len2 "{ Class: SmallInteger }"
     dim  "{ Class: SmallInteger }"
     prevRow row col 
     dimPlus1 "{ Class: SmallInteger }"
     min pp c1 c2|

    len1 := self size.
    len2 := aString size.

    "create the help-matrix"

    dim := len1 max:len2.
    dimPlus1 := dim + 1.

    d := Array new:dimPlus1.
    1 to:dimPlus1 do:[:i |
	d at:i put:(Array new:dimPlus1)
    ].

    "init help-matrix"

    (d at:1) at:1 put:0.
    row := d at:1.
    1 to:dim do:[:j |
	row at:(j + 1) put:( (row at:j) + insrtWeight )
    ].

    1 to:dim do:[:i |
	 (d at:(i + 1)) at:1 put:(  ((d at:i) at:1) + deleteWeight )
    ].

    1 to:len1 do:[:i |
	c1 := self at:i.
	1 to:len2 do:[:j |
	    c2 := aString at:j.
	    (c1 == c2) ifTrue:[
		pp := 0
	    ] ifFalse:[
		(c1 asLowercase == c2 asLowercase) ifTrue:[
		    pp := caseWeight
		] ifFalse:[
		    pp := substWeight
		]
	    ].
	    prevRow := d at:i.
	    row := d at:(i + 1).
	    col := j + 1.
	    min := (prevRow at:j) + pp.
	    min := min min:( (row at:j) + insrtWeight).
	    min := min min:( (prevRow at:col) + deleteWeight).
	    row at:col put: min
	]
    ].

    ^ (d at:(len1 + 1)) at:(len2 + 1)
!

numArgs
    "treating the receiver as a message selector, return how many arguments would it take"

    |binopChars|

    (self size > 2) ifFalse:[
	binopChars := '|&-+=*/\<>~@,'.
	(self size == 1) ifTrue:[
	    ((binopChars occurrencesOf:(self at:1)) == 0) ifTrue:[^ 0].
	    ^ 1
	].
	((binopChars occurrencesOf:(self at:1)) == 0) ifFalse:[
	    ((binopChars occurrencesOf:(self at:2)) == 0) ifFalse:[^ 1]
	]
    ].
    ^ self occurrencesOf:$:

    "
     'foo:bar:' numArgs  
     #foo:bar: numArgs    
     'hello' numArgs       
     '+' numArgs   
    "
!

partsIfSelector
    "treat the receiver as a message selector, return a collection of parts."

    |idx1 "{ Class: SmallInteger }"
     coll idx2 sz|

    coll := OrderedCollection new.
    idx1 := 1.
    sz := self size.
    [true] whileTrue:[
	idx2 := self indexOf:$: startingAt:idx1 + 1.
	(idx2 == 0 or:[idx2 == sz]) ifTrue:[
	    coll add:(self copyFrom:idx1).
	    ^ coll
	].
	coll add:(self copyFrom:idx1 to:idx2).
	idx1 := idx2 + 1
    ].

    "
     'foo:bar:' partsIfSelector     
     #foo:bar: partsIfSelector     
     'hello' partsIfSelector       
     '+' partsIfSelector           
    "
!

spellAgainst: aString 
    "return an integer between 0 and 100 indicating how similar 
     the argument is to the receiver.  No case conversion is done.
     This algorithm is much simpler (but also less exact) than the
     levenshtein distance. Experiment which is better for your
     application."

    | i1     "{ Class: SmallInteger }"
      i2     "{ Class: SmallInteger }"
      next1  "{ Class: SmallInteger }"
      next2  "{ Class: SmallInteger }"
      size1  "{ Class: SmallInteger }"
      size2  "{ Class: SmallInteger }"
      score  "{ Class: SmallInteger }"
      maxLen "{ Class: SmallInteger }" |

    size1 := self size.
    size2 := aString size.
    maxLen := size1 max:size2.
    score := 0.
    i1 := i2 := 1.
    [i1 <= size1 and: [i2 <= size2]] whileTrue:[
	next1 := i1 + 1.
	next2 := i2 + 1.
	(self at:i1) == (aString at:i2) ifTrue: [
	    score := score+1.             
	    i1 := next1.                    
	    i2 := next2
	] ifFalse: [
	    (i2 < size2 and: [(self at:i1) == (aString at:next2)]) ifTrue: [
		i2 := next2
	    ] ifFalse: [
		(i1 < size1 and: [(self at:next1) == (aString at:i2)]) ifTrue: [
		    i1 := next1
		] ifFalse: [
		    i1 := next1.
		    i2 := next2
		] 
	    ] 
	] 
    ].

    score = maxLen ifTrue: [^ 100].
    ^ 100 * score // maxLen

    " 
     'Smalltalk' spellAgainst: 'Smalltlak' 
     'Smalltalk' spellAgainst: 'smalltlak' 
     'Smalltalk' spellAgainst: 'smalltalk' 
     'Smalltalk' spellAgainst: 'smalltlk'  
     'Smalltalk' spellAgainst: 'Smalltolk'   
    "
! !

!CharacterArray class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/Attic/CharArray.st,v 1.45 1995-12-11 17:24:58 cg Exp $'
! !