CharacterArray.st
author claus
Sat, 11 Feb 1995 17:42:09 +0100
changeset 247 9e3057039d60
parent 244 9faa2da0650a
child 255 2b2c5c0facab
permissions -rw-r--r--
asText eliminated

"
 COPYRIGHT (c) 1994 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

ByteArray subclass:#AbstractString
       instanceVariableNames:''
       classVariableNames:''
       poolDictionaries:''
       category:'Collections-Text'
!

AbstractString comment:'
COPYRIGHT (c) 1994 by Claus Gittinger
	      All Rights Reserved

$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.13 1995-02-11 16:41:53 claus Exp $
'!

!AbstractString class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1994 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

version
"
$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.13 1995-02-11 16:41:53 claus Exp $
"
!

documentation
"
    AbstractString is a superclass for all kinds of Strings (i.e.
    (singleByte-)Strings, TwoByteStrings and whatever comes in the future.

    As the name already implies, this class is abstract, meaning that there are
    no instances of it. All this class does is provide common protocol for 
    concrete subclasses.
"
! !

!AbstractString class methodsFor:'instance creation'!

basicNew
    "return a new empty string"

    ^ self basicNew:0
!

new
    "return a new empty string"

    ^ self basicNew:0
!

fromString:aString
    "return a copy of the argument, aString"

    ^ (self basicNew:(aString size)) replaceFrom:1 with:aString

    "TwoByteString fromString:'hello'"
! !

!AbstractString methodsFor:'converting'!

asUppercase
    "return a copy of myself in uppercase letters"

    |newStr
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    1 to:mySize do:[:i |
	newStr at:i put:(self at:i) asUppercase
    ].
    ^ newStr

    "
     'helloWorld' asUppercase      
     'helloWorld' asUppercaseFirst 
    "
!

asLowercase
    "return a copy of myself in lowercase letters"

    |newStr
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    1 to:mySize do:[:i |
	newStr at:i put:(self at:i) asLowercase
    ].
    ^ newStr

    "
     'HelloWorld' asLowercase   
     'HelloWorld' asLowercaseFirst   
    "
!

asUppercaseFirst
    "return a copy of myself where the first character is
     converted to uppercase."

    |newString|

    newString := self copy.
    newString size > 0 ifTrue:[
	newString at:1 put:(newString at:1) asUppercase
    ].
    ^ newString

    "
     'helloWorld' asUppercase      
     'helloWorld' asUppercaseFirst 
     'HelloWorld' asUppercaseFirst   
    "
!

asLowercaseFirst
    "return a copy of myself where the first character is
     converted to lowercase."

    |newString|

    newString := self copy.
    newString size > 0 ifTrue:[
	newString at:1 put:(newString at:1) asLowercase
    ].
    ^ newString

    "
     'HelloWorld' asLowercase   
     'HelloWorld' asLowercaseFirst   
    "
!

asString
    "return myself - I am a string"

    ^ self
!

asTwoByteString
    "return the receiver converted to a two-byte string"

    ^ TwoByteString fromString:self
!

asSingleByteString
    "return the receiver converted to a 'normal' string"

    ^ String fromString:self
!

asStringCollection
    "return a collection of lines from myself."

    ^ StringCollection from:self
!

asNumber
    "read a number from the receiver.
     Notice, that errors may occur during the read, so you better
     setup some signal handler when using this method."

    ^ Number readFromString:self

    "
     '123'     asNumber
     '123.567' asNumber
     '(5/6)'   asNumber
     'foo'     asNumber
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asNumber] 
    "
!

asInteger
    "read an integer from the receiver.
     Notice, that errors may occur during the read, so you better
     setup some signal handler when using this method."

    ^ Integer readFromString:self

    "
     '12345678901234567890' asInteger
     '-1234' asInteger
     '0.123' asInteger   <- reader stops at ., returning 0 here
     '0.123' asNumber    <- returns what you expect
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asInteger] 
    "
!

asFloat
    "read a float number from the receiver.
     Notice, that errors may occur during the read, so you better
     setup some signal handler when using this method."

    ^ (Number readFromString:self) asFloat

    "
     '0.123' asFloat 
     '12345' asFloat
     '(1/5)' asFloat
     Object errorSignal handle:[:ex | ex returnWith:0] do:['foo' asFloat] 
    "
!

asFilename
    "return a Filename with pathname taken from the receiver"

    ^ Filename named:self
!

asCollectionOfWords
    "return a collection containing the words (separated by whitespace) 
     of the receiver. Multiple occurences of whitespace characters will
     be treated like one - i.e. whitespace is skipped."

    |words
     start  "{ Class:SmallInteger }" 
     stop   "{ Class:SmallInteger }" 
     mySize "{ Class:SmallInteger }"|

    words := OrderedCollection new.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	start := self indexOfNonSeparatorStartingAt:start.
	start == 0 ifTrue:[
	    ^ words
	].
	stop := self indexOfSeparatorStartingAt:start.
	stop == 0 ifTrue:[
	    words add:(self copyFrom:start to:mySize).
	    ^ words
	].
	words add:(self copyFrom:start to:(stop - 1)).
	start := stop
    ].
    ^ words

    "
     'hello world isnt this nice' asCollectionOfWords
     '    hello    world   isnt   this   nice  ' asCollectionOfWords
     'hello' asCollectionOfWords
     '' asCollectionOfWords
     '      ' asCollectionOfWords
    "
!

asCollectionOfSubstringsSeparatedBy:aCharacter
    "return a collection containing the lines (separated by aCharacter) 
     of the receiver. If aCharacter occurs multiple times in a row, 
     the result will contain empty strings."

    |lines myClass
     numberOfLines "{ Class:SmallInteger }"
     startIndex    "{ Class:SmallInteger }"
     stopIndex     "{ Class:SmallInteger }" |

    "count first, to avoid regrowing"

    numberOfLines := (self occurrencesOf:aCharacter) + 1.
    lines := OrderedCollection new:numberOfLines.
    myClass := self species.

    startIndex := 1.
    1 to:numberOfLines do:[:lineNr |
	stopIndex := self indexOf:aCharacter startingAt:startIndex.
	stopIndex == 0 ifTrue:[
	    stopIndex := self size
	] ifFalse: [
	    stopIndex := stopIndex - 1.
	].

	(stopIndex < startIndex) ifTrue: [
	    lines add:(myClass new:0)
	] ifFalse: [
	    lines add:(self copyFrom:startIndex to:stopIndex)
	].
	startIndex := stopIndex + 2
    ].
    ^ lines

    "
     '1 one:2 two:3 three:4 four:5 five' withCRs asCollectionOfSubstringsSeparatedBy:$: 
     '1 one 2 two 3 three 4 four 5 five' withCRs asCollectionOfSubstringsSeparatedBy:Character space
    "
!

asCollectionOfLines
    "return a collection containing the lines (separated by cr) 
     of the receiver. If multiple cr's occur in a row, the result will
     contain empty strings."

    ^ self asCollectionOfSubstringsSeparatedBy:Character cr

    "
     '1 one\2 two\3 three\4 four\5 five' withCRs asCollectionOfLines
     '1 one\\\\2 two\3 three' withCRs asCollectionOfLines  
    "
!

asArrayOfSubstrings
    "return an array of substrings from the receiver, interpreting
     separators (i.e. spaces & newlines) as word-delimiters.
     This is a compatibility method - the actual work is done in
     asCollectionOfWords."

    ^ self asCollectionOfWords asArray

    "
     '1 one two three four 5 five' asArrayOfSubstrings  
    "
!

asCollectionOfSubstringsSeparatedByAny:aCollectionOfSeparators
    "return a collection containing the words (separated by any character
     from aCollectionOfSeparators) of the receiver.
     This allows breaking up strings using any character as separator."

    |words
     start  "{ Class:SmallInteger }" 
     stop   "{ Class:SmallInteger }" 
     mySize "{ Class:SmallInteger }"|

    words := OrderedCollection new.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	"skip multiple separators"
	[aCollectionOfSeparators includes:(self at:start)] whileTrue:[
	    start := start + 1 .
	    start > mySize ifTrue:[
		^ words
	    ].
	].

	stop := self indexOfAny:aCollectionOfSeparators startingAt:start.
	stop == 0 ifTrue:[
	    words add:(self copyFrom:start to:mySize).
	    ^ words
	].
	words add:(self copyFrom:start to:(stop - 1)).
	start := stop
    ].
    ^ words

    "
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:#($:)
     'hello:world:isnt:this nice' asCollectionOfSubstringsSeparatedByAny:(Array with:$: with:Character space) 
     'h1e2l3l4o' asCollectionOfSubstringsSeparatedByAny:($1 to: $9) 
    "
!

contractTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     in the middle have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz halfSize|

    (sz := self size) > maxLen ifTrue:[
	halfSize := maxLen // 2.
	^ self copyReplaceFrom:halfSize - 1
			    to:sz - maxLen + halfSize + 1
			    with:'...'
    ]

    "
     '12345678901234' contractTo:15          
     '123456789012345' contractTo:15          
     '1234567890123456' contractTo:15         
     'aShortString' contractTo:15 
     'aVeryLongNameForAStringThatShouldBeShortened' contractTo:15 
    "
!

contractAtEndTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     at the end have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz|

    (sz := self size) > maxLen ifTrue:[
	^ self copyReplaceFrom:maxLen - 3
			    with:'...'
    ]

    "
     '12345678901234' contractAtEndTo:15          
     '123456789012345' contractAtEndTo:15          
     '1234567890123456' contractAtEndTo:15          
     'aShortString' contractAtEndTo:15          
     'aVeryLongNameForAStringThatShouldBeShortened' contractAtEndTo:15 
    "
!

contractAtBeginningTo:maxLen
    "if the receivers size is less or equal to maxLen, return it.
     Otherwise, return a copy of the receiver, where some characters 
     at the beginning have been replaced by '...' for a total string length
     of maxLen. Can be used to abbreviate long entries in tables."

    |sz|

    (sz := self size) > maxLen ifTrue:[
	^ '...' , (self copyFrom:(sz - (maxLen - 4))) 
    ]

    "
     '12345678901234' contractAtBeginningTo:15          
     '123456789012345' contractAtBeginningTo:15          
     '1234567890123456' contractAtBeginningTo:15          
     'aShortString' contractAtBeginningTo:15          
     'aVeryLongNameForAStringThatShouldBeShortened' contractAtBeginningTo:15
    "
! !

!AbstractString methodsFor:'ST/V compatibility'!

replChar:oldChar with:newChar
    "return a copy of the receiver, with all oldChars replaced
     by newChar.
     This is an ST/V compatibility method."

    ^ self copy replaceAll:oldChar by:newChar

    "
     '12345678901234567890' replChar:$0 with:$* 
    "
!

replChar:oldChar withString:newString
    "return a copy of the receiver, with all oldChars replaced
     by newString (i.e. slice in the newString in place of the oldChar).
     This is an ST/V compatibility method."

    |tmpStream|

    tmpStream := WriteStream on:(self class new).
    self do:[:element |
	element = oldChar ifTrue:[
	    tmpStream nextPutAll:newString
	] ifFalse:[
	    tmpStream nextPut:element 
	].
    ].
    ^ tmpStream contents

   "
     '12345678901234567890' replChar:$0 withString:'foo' 
     'a string with spaces' replChar:$  withString:' foo '  
    "
!

trimBlanks
    "return a copy of the receiver without leading
     and trailing spaces.
     This is an ST/V compatibility method."

    ^ self withoutSpaces

    "
     '    spaces at beginning' trimBlanks     
     'spaces at end    ' trimBlanks           
     '    spaces at beginning and end     ' trimBlanks    
     'no spaces' trimBlanks              
    "
!

byteAt:index put:aByte
    "store a byte at given index.
     This is an ST/V compatibility method."

    (aByte == 0) ifTrue:[
	"store a space instead"
	^ super basicAt:index put:(Character space)
    ].
    ^ super at:index put:(Character value:aByte)
! !

!AbstractString methodsFor:'printing & storing'!

article
    "return an article string for the receiver."

    |firstChar|

    firstChar := (self at:1) asLowercase. 
    (firstChar isVowel or:[firstChar == $x]) ifTrue:[
	firstChar ~~ $u ifTrue:[
	     ^ 'an'
	]
    ].
    ^ 'a'
!

printOn:aStream
    "print the receiver on aStream"

    aStream nextPutAll:self
!

printString
    "return a string for printing - thats myself"

    ^ self
!

displayString
    "return a string to display the receiver - use storeString to have
     quotes around."

    ^ self storeString
! !

!AbstractString methodsFor:'comparing'!

hash
    "return an integer useful as a hash-key"

%{  /* NOCONTEXT */

    REGISTER int g, val;
    REGISTER unsigned char *cp, *cp0;
    int l;

    cp = _stringVal(self);
    l = _stringSize(self);
    if (_qClass(self) != String) {
	int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));

	cp += n;
	l -= n;
    }

    /*
     * this is the dragon-book algorithm with a funny start
     * value (to give short strings a number above 8192)
     */
    val = 12345;
    for (cp0 = cp, cp += l - 1; cp >= cp0; cp--) {
	val = (val << 5) + (*cp & 0x1F);
	if (g = (val & 0x3E000000))
	    val ^= g >> 25 /* 23 */ /* 25 */;
	val &= 0x3FFFFFFF;
    }

    if (l) {
	l |= 1; 
	val = (val * l) & 0x3FFFFFFF;
    }

    RETURN ( _MKSMALLINT(val) );
%}
!

<= something
    "Compare the receiver with the argument and return true if the
     receiver is less than or equal to the argument. Otherwise return false."

    ^ (self > something) not
!

< something
    "Compare the receiver with the argument and return true if the
     receiver is less than the argument. Otherwise return false."

    ^ (something > self)
!

>= something
    "Compare the receiver with the argument and return true if the
     receiver is greater than or equal to the argument.
     Otherwise return false."

    ^ (something > self) not
!

> aString
    "Compare the receiver with the argument and return true if the
     receiver is greater than the argument. Otherwise return false.
     In contrast to ST-80, case differences are NOT ignored, thus
     'foo' > 'Foo' will return true. 
     This may change."

    |mySize    "{ Class: SmallInteger }"
     otherSize "{ Class: SmallInteger }" 
     c1 c2|

    mySize := self size.
    otherSize := aString size.

    1 to:(mySize min:otherSize) do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	c1 > c2 ifTrue:[^ true].
	c1 < c2 ifTrue:[^ false].
    ].
    ^ mySize > otherSize
!

= aString
    "Compare the receiver with the argument and return true if the
     receiver is equal to the argument. Otherwise return false.
     This compare doe NOT ignore case differences, 
     therefore 'foo' = 'Foo' will return false."

    |mySize    "{ Class: SmallInteger }"
     otherSize |

    aString species == self species ifFalse:[^ false].
    mySize := self size.
    otherSize := aString size.
    mySize == otherSize ifFalse:[^ false].

    1 to:mySize do:[:index |
	(self at:index) = (aString at:index) ifFalse:[^ false].
    ].
    ^ true

    "
     'foo' = 'Foo'  
     'foo' = 'bar'  
     'foo' = 'foo'   
    "
!

sameAs:aString
    "Compare the receiver with the argument like =, but ignore
     case differences. Return true or false"

    |mySize "{ Class: SmallInteger }"
     otherSize c1 c2|

    mySize := self size.
    otherSize := aString size.
    mySize == otherSize ifFalse:[^ false].

    1 to:mySize do:[:index |
	c1 := self at:index.
	c2 := aString at:index.
	c1 == c2 ifFalse:[
	    c1 asLowercase = c2 asLowercase ifFalse:[^ false].
	]
    ].
    ^ true

    "
     'foo' sameAs: 'Foo'   
     'foo' sameAs: 'bar' 
     'foo' sameAs: 'foo'   
    "
! !

!AbstractString methodsFor:'character searching'!

includesMatchCharacters
    "return true if the receiver includes any meta characters (i.e. $* or $#) 
     for match operations; false if not"

    ^ self includesAny:'*#['
!

indexOfSeparator
    "return the index of the first whitespace character"

    ^ self indexOfSeparatorStartingAt:1

    "'hello world' indexOfSeparator"
!

indexOfSeparatorStartingAt:startIndex
    "return the index of the next whitespace character"

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.
    mySize := self size.

    start to:mySize do:[:index |
	(self at:index) isSeparator ifTrue:[^ index]
    ].
    ^ 0

    "'hello world' indexOfSeparatorStartingAt:3"
!

indexOfNonSeparatorStartingAt:startIndex
    "return the index of the next non-whitespace character"

    |start  "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"|

    start := startIndex.
    mySize := self size.

    start to:mySize do:[:index |
	(self at:index) isSeparator ifFalse:[^ index]
    ].
    ^ 0

    "
     '    hello world' indexOfNonSeparatorStartingAt:1 
    "
    "
     |s index1 index2|
     s := '   foo    bar      baz'.
     index1 := s indexOfNonSeparatorStartingAt:1.
     index2 := s indexOfSeparatorStartingAt:index1.
     s copyFrom:index1 to:index2 - 1
    "
! !

!AbstractString methodsFor:'substring searching'!

findString:subString 
    "find a substring. if found, return the index;
     if not found, return 0."

    ^ self findString:subString startingAt:1 ifAbsent:[0]

    "
     'hello world' findString:'llo'   
     'hello world' findString:'ole'  
    "
!

findString:subString startingAt:index
    "find a substring, starting at index. if found, return the index;
     if not found, return 0."

    ^ self findString:subString startingAt:index ifAbsent:[0]

    "
     'hello yello' findString:'llo' startingAt:1   
     'hello yello' findString:'llo' startingAt:5   
     'hello yello' findString:'llo' startingAt:15   
    "
!

findString:subString startingAt:index ifAbsent:exceptionBlock
    "find a substring, starting at index. if found, return the index;
     if not found, return the result of evaluating exceptionBlock.
     This is a q&d hack - not very efficient"

    |firstChar found
     startIndex "{ Class: SmallInteger }"
     subSize    "{ Class: SmallInteger }"
     mySize     "{ Class: SmallInteger }"
     runIdx     "{ Class: SmallInteger }" |

    subSize := subString size.
    subSize == 0 ifTrue:[^ index]. "empty string matches"
    mySize := self size.
    firstChar := subString at:1.
    startIndex := self indexOf:firstChar startingAt:index.
    [startIndex == 0] whileFalse:[
	runIdx := startIndex.
	found := true.
	1 to:subSize do:[:i |
	    runIdx > mySize ifTrue:[
		found := false
	    ] ifFalse:[
		(subString at:i) ~~ (self at:runIdx) ifTrue:[
		    found := false
		]
	    ].
	    runIdx := runIdx + 1
	].
	found ifTrue:[
	    ^ startIndex
	].
	startIndex := self indexOf:firstChar startingAt:(startIndex + 1)
    ].
    ^ exceptionBlock value
! !

!AbstractString methodsFor:'pattern matching'!

from:matchStart to:matchStop match:aString from:start to:stop
    "helper for match; return true if the characters from start to stop in
     aString are matching the receivers characters from matchStart to matchStop.
     The receiver may contain meta-match characters $* (to match any string) 
     or $# (to match any character) or [...] (to match from a set of characters).
     This algorithm is not very efficient; for heavy duty pattern matching,
     an interface (primitive) to the regex pattern matching package should be
     added."

    |matchChar 
     mStart "{ Class: SmallInteger }"
     mStop  "{ Class: SmallInteger }"
     sStart "{ Class: SmallInteger }"
     sStop  "{ Class: SmallInteger }"
     mSize  "{ Class: SmallInteger }"
     sSize  "{ Class: SmallInteger }"
     index cont matchLast
     matchSet c1 c2|

    mStart := matchStart.
    mStop := matchStop.
    sStart := start.
    sStop := stop.

    [true] whileTrue:[
	mSize := mStop - mStart + 1.
	sSize := sStop - sStart + 1.

	"empty strings match"
	(mSize == 0) ifTrue:[^ (sSize == 0)].

	matchChar := self at:mStart.

	(matchChar == $#) ifTrue:[
	    "testString empty -> no match"
	    (sSize == 0) ifTrue:[^ false].
	    "# matches single character"
	    ((sSize == 1) and:[mSize == 1]) ifTrue:[^ true].
	    "cut off 1st chars and continue"
	    mStart := mStart + 1.
	    sStart := sStart + 1
	] ifFalse:[
	    (matchChar == $[) ifTrue:[
		"testString empty -> no match"
		(sSize == 0) ifTrue:[^ false].

		matchSet := Set new.
		c1 := nil.
		[matchChar == $]] whileFalse:[
		    mStart := mStart + 1.
		    mSize := mSize - 1.
		    matchChar := self at:mStart.
		    ((matchChar == $-) and:[c1 notNil]) ifTrue:[
			mStart := mStart + 1.
			mSize := mSize - 1.
			c2 := self at:mStart.
			(c1 to:c2) do:[:c | matchSet add:c].
			c1 := nil
		    ] ifFalse:[
			(matchChar == $]) ifFalse:[
			    matchSet add:matchChar.
			    c1 := matchChar.
			].
		    ].
		].
		mStart := mStart + 1.
		mSize := mSize - 1.
		(matchSet includes:(aString at:sStart)) ifFalse:[^ false].
		((sSize == 1) and:[mSize == 0]) ifTrue:[^ true].
		"cut off 1st char and continue"
		sStart := sStart + 1
	    ] ifFalse:[
		(matchChar == $*) ifTrue:[
		    "* matches anything"
		    (mSize == 1) ifTrue:[^ true].
		    "testString empty -> matchString not we have no match"
		    (sSize == 0) ifTrue:[^ false].

		    "try to avoid some of the recursion by checking last
		     character and continue with shortened strings if possible"
		    cont := false.
		    (mStop >= mStart) ifTrue:[
			matchLast := self at:mStop.
			(matchLast ~~ $*) ifTrue:[
			    (matchLast == $#) ifTrue:[
				cont := true
			    ] ifFalse:[
				(matchLast == (aString at:sStop)) ifTrue:[
				    cont := true
				]
			    ]
			]
		    ].
		    cont ifFalse:[
			index := sStart.
			[index <= sStop] whileTrue:[
			    (self from:(mStart + 1) to:mStop match:aString 
				  from:index to:sStop) ifTrue:[
				^ true
			    ].
			    index := index + 1
			].
			^ false
		    ].
		    mStop := mStop - 1.
		    sStop := sStop - 1
		] ifFalse:[

		    "testString empty ?"
		    (sSize == 0) ifTrue:[^ false].

		    "first characters equal ?"
		    ((aString at:sStart) ~~ matchChar) ifTrue:[^ false].

		    "avoid recursion if possible"
		    ((sSize == mSize) and:[self = aString]) ifTrue:[^ true].

		    "cut off 1st chars and continue"
		    mStart := mStart + 1.
		    sStart := sStart + 1
		]
	    ]
	]
    ]
!

match:aString
    "return true if aString matches self, where self may contain meta-match 
     characters $* (to match any string) or $# (to match any character)."

    ^ self from:1 to:(self size) match:aString from:1 to:(aString size)

    "
     '*ute*' match:'computer' 
     '*uter' match:'computer' 
     'uter*' match:'computer' 
     '*ute*' match:'' 
     '[abcd]*' match:'computer' 
     '[abcd]*' match:'komputer' 
     '*some*compl*ern*' match:'this is some more complicated pattern match' 
     '*some*compl*ern*' match:'this is another complicated pattern match' 
    "
! !

!AbstractString methodsFor:'testing'!

isBlank
    "return true, if the receiver contains spaces only"

    self do:[:char |
	char ~~ Character space ifTrue:[^ false].
    ].
    ^ true
!

isAlphaNumeric
    "return true, if the receiver is some alphanumeric word;
     i.e. consists of a letter followed by letters or digits."

    self size == 0 ifTrue:[
	"mhmh what is this ?"
	^ false
    ].
    (self at:1) isLetter ifFalse:[^ false].
    self do:[:char |
	char isLetterOrDigit ifFalse:[^ false].
    ].
    ^ true

    "
     'helloWorld' isAlphaNumeric  
     'foo1234' isAlphaNumeric    
     'f1234' isAlphaNumeric      
     '1234' isAlphaNumeric       
     '+' isAlphaNumeric         
    "
!

countWords
    "return the number of words, which are separated by separators"

    |tally "{ Class: SmallInteger }"
     start "{ Class: SmallInteger }"
     mySize "{ Class: SmallInteger }"
     stop ch|

    tally := 0.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
	ch := self at:start.
	ch isSeparator ifTrue:[
	    start := start + 1
	] ifFalse:[
	    stop := self indexOfSeparatorStartingAt:start.
	    (stop == 0) ifTrue:[
		stop := mySize + 1
	    ].
	    tally := tally + 1.
	    start := stop
	]
    ].
    ^ tally

    "
     'hello world isnt this nice' countWords'
    "
!

numArgs
    "treating the receiver as a message selector, return how many arguments would it take"

    |binopChars|

    (self size > 2) ifFalse:[
	binopChars := '|&-+=*/\<>~@,'.
	(self size == 1) ifTrue:[
	    ((binopChars occurrencesOf:(self at:1)) == 0) ifTrue:[^ 0].
	    ^ 1
	].
	((binopChars occurrencesOf:(self at:1)) == 0) ifFalse:[
	    ((binopChars occurrencesOf:(self at:2)) == 0) ifFalse:[^ 1]
	]
    ].
    ^ self occurrencesOf:$:

    "
     'foo:bar:' numArgs  
     #foo:bar: numArgs    
     'hello' numArgs       
     '+' numArgs   
    "
!

partsIfSelector
    "treat the receiver as a message selector, return a collection of parts."

    |idx1 "{ Class: SmallInteger }"
     coll idx2 sz|

    coll := OrderedCollection new.
    idx1 := 1.
    sz := self size.
    [true] whileTrue:[
	idx2 := self indexOf:$: startingAt:idx1 + 1.
	(idx2 == 0 or:[idx2 == sz]) ifTrue:[
	    coll add:(self copyFrom:idx1).
	    ^ coll
	].
	coll add:(self copyFrom:idx1 to:idx2).
	idx1 := idx2 + 1
    ].

    "
     'foo:bar:' partsIfSelector     
     #foo:bar: partsIfSelector     
     'hello' partsIfSelector       
     '+' partsIfSelector           
    "
!

levenshteinTo:aString
    "return the levenshtein distance to the argument, aString;
     this value corrensponds to the number of replacements that have to be
     made to get aString from the receiver.
     See IEEE transactions on Computers 1976 Pg 172 ff."

    ^ self levenshteinTo:aString s:4 c:1 i:2 d:6

    "
     'ocmprt' levenshteinTo:'computer'
     'computer' levenshteinTo:'computer'
     'ocmputer' levenshteinTo:'computer'
     'cmputer' levenshteinTo:'computer'
     'Computer' levenshteinTo:'computer'
    "
!

levenshteinTo:aString s:substWeight c:caseWeight i:insrtWeight d:deleteWeight
    "parametrized levenshtein. 
     return the levenshtein distance to the argument, aString;
     this value corrensponds to the number of replacements that have to be
     made to get aString from the receiver.
     The arguments are the costs for substitution, case-change, insertion and 
     deletion of a character.
     See IEEE transactions on Computers 1976 Pg 172 ff"

    |d  "delta matrix"
     len1 "{ Class: SmallInteger }"
     len2 "{ Class: SmallInteger }"
     dim  "{ Class: SmallInteger }"
     prevRow row col 
     dimPlus1 "{ Class: SmallInteger }"
     min pp c1 c2|

    len1 := self size.
    len2 := aString size.

    "create the help-matrix"

    dim := len1 max:len2.
    dimPlus1 := dim + 1.

    d := Array new:dimPlus1.
    1 to:dimPlus1 do:[:i |
	d at:i put:(Array new:dimPlus1)
    ].

    "init help-matrix"

    (d at:1) at:1 put:0.
    row := d at:1.
    1 to:dim do:[:j |
	row at:(j + 1) put:( (row at:j) + insrtWeight )
    ].

    1 to:dim do:[:i |
	 (d at:(i + 1)) at:1 put:(  ((d at:i) at:1) + deleteWeight )
    ].

    1 to:len1 do:[:i |
	c1 := self at:i.
	1 to:len2 do:[:j |
	    c2 := aString at:j.
	    (c1 == c2) ifTrue:[
		pp := 0
	    ] ifFalse:[
		(c1 asLowercase == c2 asLowercase) ifTrue:[
		    pp := caseWeight
		] ifFalse:[
		    pp := substWeight
		]
	    ].
	    prevRow := d at:i.
	    row := d at:(i + 1).
	    col := j + 1.
	    min := (prevRow at:j) + pp.
	    min := min min:( (row at:j) + insrtWeight).
	    min := min min:( (prevRow at:col) + deleteWeight).
	    row at:col put: min
	]
    ].

    ^ (d at:(len1 + 1)) at:(len2 + 1)
!

spellAgainst: aString 
    "return an integer between 0 and 100 indicating how similar 
     the argument is to the receiver.  No case conversion is done.
     This algorithm is much simpler (but also less exact) than the
     levenshtein distance. Experiment which is better for your
     application."

    | i1     "{ Class: SmallInteger }"
      i2     "{ Class: SmallInteger }"
      next1  "{ Class: SmallInteger }"
      next2  "{ Class: SmallInteger }"
      size1  "{ Class: SmallInteger }"
      size2  "{ Class: SmallInteger }"
      score  "{ Class: SmallInteger }"
      maxLen "{ Class: SmallInteger }" |

    size1 := self size.
    size2 := aString size.
    maxLen := size1 max:size2.
    score := 0.
    i1 := i2 := 1.
    [i1 <= size1 and: [i2 <= size2]] whileTrue:[
	next1 := i1 + 1.
	next2 := i2 + 1.
	(self at:i1) == (aString at:i2) ifTrue: [
	    score := score+1.             
	    i1 := next1.                    
	    i2 := next2
	] ifFalse: [
	    (i2 < size2 and: [(self at:i1) == (aString at:next2)]) ifTrue: [
		i2 := next2
	    ] ifFalse: [
		(i1 < size1 and: [(self at:next1) == (aString at:i2)]) ifTrue: [
		    i1 := next1
		] ifFalse: [
		    i1 := next1.
		    i2 := next2
		] 
	    ] 
	] 
    ].

    score = maxLen ifTrue: [^ 100].
    ^ 100 * score // maxLen

    " 
     'Smalltalk' spellAgainst: 'Smalltlak' 
     'Smalltalk' spellAgainst: 'smalltlak' 
     'Smalltalk' spellAgainst: 'smalltalk' 
     'Smalltalk' spellAgainst: 'smalltlk'  
     'Smalltalk' spellAgainst: 'Smalltolk'   
    "
! !

!AbstractString methodsFor:'padded copying'!

paddedTo:newSize with:padCharacter
    "return a new string consisting of the receivers characters,
     plus pad characters up to length.
     If the receivers size is larger than the legth argument, it
     is returned unchanged."

    |s len|

    len := self size.
    len < newSize ifTrue:[
	s := self species new:newSize withAll:padCharacter.
	s replaceFrom:1 to:len with:self.
	^ s
    ]

    "
     'foo' paddedTo:10 with:$.             
     123 printString paddedTo:10 with:$*   
     (Float pi printString) paddedTo:15 with:(Character space)  
     (Float pi printString) paddedTo:15 with:$-  
     (Float pi class name , ' ') paddedTo:15 with:$.  
    "
!

leftPaddedTo:size with:padCharacter
    "return a new string of length size, which contains the receiver
     right-adjusted (i.e. padded on the left).
     Characters on the left are filled with padCharacter.
     If the receivers size is larger than the legth argument, it
     is returned unchanged."

    |len s|

    len := self size.
    (len < size) ifTrue:[
	s := self species new:size withAll:padCharacter.
	s replaceFrom:(size - len + 1) with:self.
	^ s
    ]

    "
     'foo' leftPaddedTo:10 with:$.      
     'fooBar' leftPaddedTo:5 with:$.      
     123 printString leftPaddedTo:10 with:$.        
     (' ' , 123 printString) leftPaddedTo:10 with:$.        
     (Float pi printString) leftPaddedTo:15 with:(Character space)  
     (Float pi printString) leftPaddedTo:15 with:$-           
     (' ' , Float pi class name) leftPaddedTo:15 with:$.     
    "
! !

!AbstractString methodsFor:'copying'!

concatenate:string1 and:string2
    "return the concatenation of myself and the arguments, string1 and string2.
     This is equivalent to self , string1 , string2
     - generated by compiler when such a construct is detected"

    ^ self , string1 , string2
!

concatenate:string1 and:string2 and:string3
    "return the concatenation of myself and the string arguments.
     This is equivalent to self , string1 , string2 , string3
     - generated by compiler when such a construct is detected"

    ^ self , string1 , string2 , string3
! !

!AbstractString methodsFor:'filling and replacing'!

withCRs
    "return a new string consisting of receivers characters
     with all \-characters replaced by cr-characters."

    ^ self copy replaceAll:$\ by:(Character cr)
!

withoutCRs
    "return a new collection consisting of receivers elements
     with all cr-characters replaced by \-characters."

    ^ self copy replaceAll:(Character cr) by:$\
!

withoutSpaces
    "return a copy of myself without leading and trailing spaces.
     Notice: this does NOT remove tabs, newline or any other whitespace.
     Use withoutSeparators for this."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz|

    sz := self size.
    startIndex := 1.
    endIndex := sz.

    [(startIndex < endIndex) and:[(self at:startIndex) == Character space]] whileTrue:[
	startIndex := startIndex + 1
    ].
    [(endIndex > 1) and:[(self at:endIndex) == Character space]] whileTrue:[
	endIndex := endIndex - 1
    ].
    startIndex > endIndex ifTrue:[
	^ ''
    ].
    ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
	^ self
    ].
    ^ self copyFrom:startIndex to:endIndex

    "
     '    foo    ' withoutSpaces  
     'foo    '     withoutSpaces   
     '    foo'     withoutSpaces  
     '       '     withoutSpaces   
     ('  foo' , Character tab asString , '    ') withoutSpaces inspect 
    "
!

withoutSeparators
    "return a copy of myself without leading and trailing whitespace.
     Whitespace is space, tab, newline, formfeed.
     Use withoutSpaces, if you want to remove spaces only."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz|

    sz := self size.
    startIndex := 1.
    endIndex := sz.

    [(startIndex < endIndex) and:[(self at:startIndex) isSeparator]] whileTrue:[
	startIndex := startIndex + 1
    ].
    [(endIndex > 1) and:[(self at:endIndex) isSeparator]] whileTrue:[
	endIndex := endIndex - 1
    ].
    startIndex > endIndex ifTrue:[
	^ ''
    ].
    ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
	^ self
    ].
    ^ self copyFrom:startIndex to:endIndex

    "
     '    foo    ' withoutSeparators      
     '    foo' withoutSeparators      
     'foo    ' withoutSeparators      
     '       ' withoutSeparators      
     ('  foo' , Character tab asString , '    ') withoutSeparators inspect 
    "
!

withTabs
    "return a copy of the receiver where leading spaces are
     replaced by tabulator characters (assuming 8-col tabs)"

    |idx nTabs newString|

    idx := self findFirst:[:c | (c ~~ Character space)].
    nTabs := (idx-1) // 8.
    nTabs == 0 ifTrue:[^ self].

    "any tabs"
    newString := self class new:(self size - (nTabs * 7)).
    newString atAll:(1 to:nTabs) put:(Character tab).
    newString replaceFrom:(nTabs + 1) with:self startingAt:(nTabs * 8 + 1).
    ^ newString

    "
     '12345678901234567890' withTabs 
     '       8901234567890' withTabs 
     '        901234567890' withTabs  
     '               67890' withTabs
     '                7890' withTabs
     '                 890' withTabs
    "
! !
        
!AbstractString methodsFor:'queries'!

isString
    "return true, if the receiver is some kind of string;
     true is returned here - redefinition of Object>>isString."

    ^ true
!

encoding
    ^ #unknown
! !