--- a/CharacterArray.st Tue Apr 11 16:52:00 1995 +0200
+++ b/CharacterArray.st Thu Apr 20 20:04:43 1995 +0200
@@ -12,7 +12,7 @@
ByteArray subclass:#CharacterArray
instanceVariableNames:''
- classVariableNames:''
+ classVariableNames:'PreviousMatch'
poolDictionaries:''
category:'Collections-Text'
!
@@ -21,7 +21,7 @@
COPYRIGHT (c) 1994 by Claus Gittinger
All Rights Reserved
-$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.17 1995-04-11 14:48:32 claus Exp $
+$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.18 1995-04-20 18:04:43 claus Exp $
'!
!CharacterArray class methodsFor:'documentation'!
@@ -42,7 +42,7 @@
version
"
-$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.17 1995-04-11 14:48:32 claus Exp $
+$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.18 1995-04-20 18:04:43 claus Exp $
"
!
@@ -180,6 +180,10 @@
^ StringCollection from:self
!
+asComposedText
+ ^ ComposedText fromString:self
+!
+
asNumber
"read a number from the receiver.
Notice, that errors may occur during the read, so you better
@@ -440,6 +444,151 @@
'aShortString' contractAtBeginningTo:15
'aVeryLongNameForAStringThatShouldBeShortened' contractAtBeginningTo:15
"
+!
+
+withCRs
+ "return a new string consisting of receivers characters
+ with all \-characters replaced by cr-characters."
+
+ ^ self copy replaceAll:$\ by:(Character cr)
+
+ "
+ 'hello\world' withCRs
+ "
+!
+
+withoutCRs
+ "return a new collection consisting of receivers elements
+ with all cr-characters replaced by \-characters.
+ This is the reverse operation of withCRs."
+
+ ^ self copy replaceAll:(Character cr) by:$\
+ "
+ 'hello
+world' withoutCRs
+ "
+!
+
+withoutSpaces
+ "return a copy of myself without leading and trailing spaces.
+ Notice: this does NOT remove tabs, newline or any other whitespace.
+ Use withoutSeparators for this."
+
+ |startIndex "{ Class: SmallInteger }"
+ endIndex "{ Class: SmallInteger }"
+ sz|
+
+ sz := self size.
+ startIndex := 1.
+ endIndex := sz.
+
+ [(startIndex < endIndex) and:[(self at:startIndex) == Character space]] whileTrue:[
+ startIndex := startIndex + 1
+ ].
+ [(endIndex > 1) and:[(self at:endIndex) == Character space]] whileTrue:[
+ endIndex := endIndex - 1
+ ].
+ startIndex > endIndex ifTrue:[
+ ^ ''
+ ].
+ ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
+ ^ self
+ ].
+ ^ self copyFrom:startIndex to:endIndex
+
+ "
+ ' foo ' withoutSpaces
+ 'foo ' withoutSpaces
+ ' foo' withoutSpaces
+ ' ' withoutSpaces
+ (' foo' , Character tab asString , ' ') withoutSpaces inspect
+ "
+!
+
+withoutSeparators
+ "return a copy of myself without leading and trailing whitespace.
+ Whitespace is space, tab, newline, formfeed.
+ Use withoutSpaces, if you want to remove spaces only."
+
+ |startIndex "{ Class: SmallInteger }"
+ endIndex "{ Class: SmallInteger }"
+ sz|
+
+ sz := self size.
+ startIndex := 1.
+ endIndex := sz.
+
+ [(startIndex < endIndex) and:[(self at:startIndex) isSeparator]] whileTrue:[
+ startIndex := startIndex + 1
+ ].
+ [(endIndex > 1) and:[(self at:endIndex) isSeparator]] whileTrue:[
+ endIndex := endIndex - 1
+ ].
+ startIndex > endIndex ifTrue:[
+ ^ ''
+ ].
+ ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
+ ^ self
+ ].
+ ^ self copyFrom:startIndex to:endIndex
+
+ "
+ ' foo ' withoutSeparators
+ ' foo' withoutSeparators
+ 'foo ' withoutSeparators
+ ' ' withoutSeparators
+ (' foo' , Character tab asString , ' ') withoutSeparators inspect
+ "
+!
+
+withTabs
+ "return a copy of the receiver where leading spaces are
+ replaced by tabulator characters (assuming 8-col tabs)"
+
+ |idx nTabs newString|
+
+ idx := self findFirst:[:c | (c ~~ Character space)].
+ nTabs := (idx-1) // 8.
+ nTabs == 0 ifTrue:[^ self].
+
+ "any tabs"
+ newString := self class new:(self size - (nTabs * 7)).
+ newString atAll:(1 to:nTabs) put:(Character tab).
+ newString replaceFrom:(nTabs + 1) with:self startingAt:(nTabs * 8 + 1).
+ ^ newString
+
+ "
+ '12345678901234567890' withTabs
+ ' 8901234567890' withTabs
+ ' 901234567890' withTabs
+ ' 67890' withTabs
+ ' 7890' withTabs
+ ' 890' withTabs
+ "
+!
+
+withTabsExpanded
+ "return a copy of the receiver where all tabulator characters
+ are expanded into spaces (assuming 8-col tabs)"
+
+ |idx str|
+
+ (self includes:(Character tab)) ifFalse:[^ self].
+ str := WriteStream on:String new.
+
+ idx := 1.
+ self do:[:ch |
+ ch == Character tab ifFalse:[
+ str nextPut:ch.
+ idx := idx + 1
+ ] ifTrue:[
+ (idx \\ 8) to:8 do:[:ii |
+ str space.
+ idx := idx + 1
+ ]
+ ]
+ ].
+ ^ str contents
! !
!CharacterArray methodsFor:'ST/V compatibility'!
@@ -553,8 +702,8 @@
cp = _stringVal(self);
l = _stringSize(self);
- if (_qClass(self) != String) {
- int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
+ if (__qClass(self) != String) {
+ int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(self))->c_ninstvars));
cp += n;
l -= n;
@@ -608,7 +757,7 @@
receiver is greater than the argument. Otherwise return false.
In contrast to ST-80, case differences are NOT ignored, thus
'foo' > 'Foo' will return true.
- This may change."
+ Since this is incompatible to ST-80, this may change."
|mySize "{ Class: SmallInteger }"
otherSize "{ Class: SmallInteger }"
@@ -629,8 +778,9 @@
= aString
"Compare the receiver with the argument and return true if the
receiver is equal to the argument. Otherwise return false.
- This compare doe NOT ignore case differences,
- therefore 'foo' = 'Foo' will return false."
+ This compare does NOT ignore case differences,
+ therefore 'foo' = 'Foo' will return false.
+ Since this is incompatible to ST-80, this may change."
|mySize "{ Class: SmallInteger }"
otherSize |
@@ -652,9 +802,33 @@
"
!
+trueCompare:aString
+ "Compare the receiver with the argument and return 1 if the receiver is
+ greater, 0 if equal and -1 if less than the argument.
+ Case differences are NOT ignored, thus
+ 'foo' trueCompare: 'Foo' will return 1."
+
+ |mySize "{ Class: SmallInteger }"
+ otherSize "{ Class: SmallInteger }"
+ c1 c2|
+
+ mySize := self size.
+ otherSize := aString size.
+
+ 1 to:(mySize min:otherSize) do:[:index |
+ c1 := self at:index.
+ c2 := aString at:index.
+ c1 > c2 ifTrue:[^ 1].
+ c1 < c2 ifTrue:[^ -1].
+ ].
+ mySize > otherSize ifTrue:[^ 1].
+ mySize < otherSize ifTrue:[^ -1].
+ ^ 0
+!
+
sameAs:aString
"Compare the receiver with the argument like =, but ignore
- case differences. Return true or false"
+ case differences. Return true or false."
|mySize "{ Class: SmallInteger }"
otherSize c1 c2|
@@ -937,8 +1111,10 @@
cont ifFalse:[
index := sStart.
[index <= sStop] whileTrue:[
- (self from:(mStart + 1) to:mStop match:aString
- from:index to:sStop) ifTrue:[
+ (self from:(mStart + 1) to:mStop
+ match:aString
+ from:index to:sStop
+ ignoreCase:ignoreCase) ifTrue:[
^ true
].
index := index + 1
@@ -1007,6 +1183,310 @@
'*some*compl*ern*' match:'this is some more complicated pattern match'
'*some*compl*ern*' match:'this is another complicated pattern match'
"
+!
+
+matchString:matchString startingAt:index ifAbsent:exceptionBlock
+ "find a matchstring, starting at index. if found, return the index;
+ if not found, return the result of evaluating exceptionBlock.
+ This is a q&d hack - not very efficient"
+
+ |firstChar found
+ startIndex "{ Class: SmallInteger }"
+ matchSize "{ Class: SmallInteger }"
+ mySize "{ Class: SmallInteger }"
+ runIdx "{ Class: SmallInteger }"
+ realMatchString|
+
+ matchSize := matchString size.
+ matchSize == 0 ifTrue:[^ index]. "empty string matches"
+
+ realMatchString := matchString.
+ (realMatchString endsWith:$*) ifFalse:[
+ realMatchString := realMatchString , '*'.
+ matchSize := matchSize + 1
+ ].
+
+ mySize := self size.
+ firstChar := realMatchString at:1.
+
+ firstChar asString includesMatchCharacters ifTrue:[
+ index to:mySize do:[:col |
+ (realMatchString from:1 to:matchSize match:self from:col to:mySize ignoreCase:false)
+ ifTrue:[^ col]
+ ].
+ ^ exceptionBlock value.
+ ].
+ startIndex := self indexOf:firstChar startingAt:index.
+ [startIndex == 0] whileFalse:[
+ (realMatchString from:1 to:matchSize match:self from:startIndex to:mySize ignoreCase:false)
+ ifTrue:[^ startIndex].
+ startIndex := self indexOf:firstChar startingAt:(startIndex + 1)
+ ].
+ ^ exceptionBlock value
+!
+
+asMatchScanCollection
+ |coll idx end c1 c2 matchSet previous|
+
+ coll := OrderedCollection new.
+ idx := 1. end := self size.
+ [idx <= end] whileTrue:[
+ |char this|
+
+ char := self at:idx.
+ char == $* ifTrue:[
+ previous ~~ #anyString ifTrue:[
+ this := #anyString
+ ]
+ ] ifFalse:[
+ char == $# ifTrue:[
+ previous ~~ #anyString ifTrue:[
+ this := #any
+ ]
+ ] ifFalse:[
+ char == $[ ifTrue:[
+ matchSet := IdentitySet new.
+ idx := idx + 1.
+ idx > end ifTrue:[^ nil].
+ char := self at:idx.
+ c1 := nil.
+ [char ~~ $]] whileTrue:[
+ ((char == $-) and:[c1 notNil]) ifTrue:[
+ idx := idx + 1.
+ idx > end ifTrue:[^ nil].
+ c2 := self at:idx.
+ (c1 to:c2) do:[:c | matchSet add:c].
+ c1 := nil.
+ idx := idx + 1.
+ ] ifFalse:[
+ (char ~~ $]) ifTrue:[
+ matchSet add:char.
+ c1 := char.
+ idx := idx + 1
+ ]
+ ].
+ idx > end ifTrue:[^ nil].
+ char := self at:idx
+ ].
+ this := matchSet asString
+ ] ifFalse:[
+ this := char
+ ]
+ ]
+ ].
+ this notNil ifTrue:[coll add:this. previous := this].
+ idx := idx + 1
+ ].
+
+ ^ coll asArray
+
+ "
+ '*ute*' asMatchScanCollection
+ '**ute**' asMatchScanCollection
+ '*uter' asMatchScanCollection
+ '[cC]#mpute[rR]' asMatchScanCollection
+ '[abcd]*' asMatchScanCollection
+ '[a-k]*'asMatchScanCollection
+ '*some*compl*ern*' asMatchScanCollection
+ '[a-' asMatchScanCollection
+ '[a-zA-Z]' asMatchScanCollection
+ '[a-z01234A-Z]' asMatchScanCollection
+ "
+!
+
+newmatch:aString ignoreCase:ignoreCase
+ "return true if aString matches self, where self may contain meta-match
+ characters $* (to match any string) or $# (to match any character)."
+
+ |matchScanArray|
+
+ (PreviousMatch notNil
+ and:[PreviousMatch key = self]) ifTrue:[
+ matchScanArray := PreviousMatch value
+ ] ifFalse:[
+ matchScanArray := self asMatchScanCollection.
+ matchScanArray isNil ifTrue:[
+ 'CHARARRAY: invalid matchpattern:' errorPrint. self errorPrintNL.
+ ^ false
+ ].
+ PreviousMatch := self -> matchScanArray.
+ ].
+
+ ^ self matchScan:matchScanArray
+ from:1 to:(matchScanArray size)
+ with:aString
+ from:1 to:(aString size)
+ ignoreCase:ignoreCase
+
+ "
+ '*ute*' newmatch:'COMPUTER' ignoreCase:true
+ '*uter' newmatch:'COMPUTER' ignoreCase:false
+ '[abcd]*' newmatch:'computer' ignoreCase:false
+ '[abcd]*' newmatch:'Computer' ignoreCase:false
+ '[a-k]*' newmatch:'komputer' ignoreCase:false
+ '[a-k]*' newmatch:'zomputer' ignoreCase:false
+ '[a-k]*' newmatch:'Komputer' ignoreCase:false
+ '[a-k]*' newmatch:'Komputer' ignoreCase:true
+ '*some*compl*ern*' newmatch:'this is some more complicated pattern match' ignoreCase:true
+ '*some*compl*ern*' newmatch:'this is another complicated pattern match' ignoreCase:true
+ Time millisecondsToRun:[
+ Symbol allInstancesDo:[:sym |
+ 'A*' match:sym ignoreCase:false
+ ]
+ ].
+ Time millisecondsToRun:[
+ Symbol allInstancesDo:[:sym |
+ 'A*' newmatch:sym ignoreCase:false
+ ]
+ ].
+ "
+!
+
+matchScan:matchScanArray from:matchStart to:matchStop with:aString from:start to:stop ignoreCase:ignoreCase
+ "helper for match; return true if the characters from start to stop in
+ aString are matching the scan in matchScan from matchStart to matchStop.
+ The matchScan is as created by asMatchScan.
+
+ This algorithm is not at all efficient; for heavy duty pattern matching,
+ an interface (primitive) to the regex pattern matching package should be
+ added."
+
+ |matchEntry
+ mStart "{ Class: SmallInteger }"
+ mStop "{ Class: SmallInteger }"
+ sStart "{ Class: SmallInteger }"
+ sStop "{ Class: SmallInteger }"
+ mSize "{ Class: SmallInteger }"
+ sSize "{ Class: SmallInteger }"
+ index "{ Class: SmallInteger }"
+ quickCheck matchLast
+ matchSet checkChar included|
+
+ mStart := matchStart.
+ mStop := matchStop.
+ sStart := start.
+ sStop := stop.
+
+ [true] whileTrue:[
+ mSize := mStop - mStart + 1.
+ sSize := sStop - sStart + 1.
+
+ "empty strings match"
+ (mSize == 0) ifTrue:[^ (sSize == 0)].
+
+ matchEntry := matchScanArray at:mStart.
+
+ "/ the most common case first:
+ (sSize ~~ 0
+ and:[(checkChar := (aString at:sStart)) == matchEntry]) ifTrue:[
+ "advance by one and continue"
+ mStart := mStart + 1.
+ sStart := sStart + 1
+ ] ifFalse:[
+ (matchEntry == #any) ifTrue:[
+ "restString empty -> no match"
+ (sSize == 0) ifTrue:[^ false].
+ "# matches single character"
+ ((sSize == 1) and:[mSize == 1]) ifTrue:[^ true].
+ "advance by one and continue"
+ mStart := mStart + 1.
+ sStart := sStart + 1
+ ] ifFalse:[
+ (matchEntry == #anyString) ifTrue:[
+ "* alone matches anything"
+ (mSize == 1) ifTrue:[^ true].
+ "restString empty & matchString not empty -> no match"
+ (sSize == 0) ifTrue:[^ false].
+
+ "
+ try to avoid some of the recursion by checking last
+ character and continue with shortened strings if possible
+ "
+ quickCheck := false.
+ (mStop >= mStart) ifTrue:[
+ matchLast := matchScanArray at:mStop.
+ (matchLast ~~ #anyString) ifTrue:[
+ (matchLast == #any) ifTrue:[
+ quickCheck := true
+ ] ifFalse:[
+ matchLast == (aString at:sStop) ifTrue:[
+ quickCheck := true
+ ] ifFalse:[
+ matchLast isString ifTrue:[
+ quickCheck := matchLast includes:(aString at:sStop)
+ ]
+ ]
+ ]
+ ]
+ ].
+ quickCheck ifFalse:[
+ "
+ no quick check possible;
+ loop over all possible substrings
+ "
+ index := sStart.
+ [index <= sStop] whileTrue:[
+ (self matchScan:matchScanArray
+ from:(mStart + 1)
+ to:mStop
+ with:aString
+ from:index
+ to:stop
+ ignoreCase:ignoreCase) ifTrue:[
+ ^ true
+ ].
+ index := index + 1
+ ].
+ ^ false
+ ].
+ "
+ quickCheck ok, advance from the right
+ "
+ mStop := mStop - 1.
+ sStop := sStop - 1
+ ] ifFalse:[
+ (matchEntry isString) ifTrue:[
+ "testString empty -> no match"
+ (sSize == 0) ifTrue:[^ false].
+
+ included := false.
+ "/ checkChar := aString at:sStart.
+ included := matchEntry includes:checkChar.
+ included ifFalse:[
+ ignoreCase ifTrue:[
+ checkChar isUppercase ifTrue:[
+ included := matchEntry includes:checkChar asLowercase.
+ ] ifFalse:[
+ included := matchEntry includes:checkChar asUppercase.
+ ]
+ ].
+ ].
+ mStart := mStart + 1.
+ mSize := mSize - 1.
+ included ifFalse:[^ false].
+
+ ((sSize == 1) and:[mSize == 0]) ifTrue:[^ true].
+ "cut off 1st char and continue"
+ sStart := sStart + 1
+ ] ifFalse:[
+ "/ must be single character
+
+ "testString empty ?"
+ (sSize == 0) ifTrue:[^ false].
+
+ "first characters equal ?"
+ "/ checkChar := aString at:sStart.
+ ignoreCase ifFalse:[^ false].
+ (checkChar asUppercase ~~ matchEntry asUppercase) ifTrue:[^ false].
+
+ "advance and continue"
+ mStart := mStart + 1.
+ sStart := sStart + 1
+ ]
+ ]
+ ]
+ ]
+ ]
! !
!CharacterArray methodsFor:'testing'!
@@ -1132,6 +1612,13 @@
made to get aString from the receiver.
See IEEE transactions on Computers 1976 Pg 172 ff."
+ "
+ in the following, we assum that ommiting a character
+ is less of an error than inserting an extra character.
+ Therefore the different insertion (i) and deletion (d)
+ values.
+ "
+
^ self levenshteinTo:aString s:4 c:1 i:2 d:6
"
@@ -1139,6 +1626,7 @@
'computer' levenshteinTo:'computer'
'ocmputer' levenshteinTo:'computer'
'cmputer' levenshteinTo:'computer'
+ 'computer' levenshteinTo:'cmputer'
'Computer' levenshteinTo:'computer'
"
!
@@ -1338,144 +1826,6 @@
^ self , string1 , string2 , string3
! !
-!CharacterArray methodsFor:'filling and replacing'!
-
-withCRs
- "return a new string consisting of receivers characters
- with all \-characters replaced by cr-characters."
-
- ^ self copy replaceAll:$\ by:(Character cr)
-!
-
-withoutCRs
- "return a new collection consisting of receivers elements
- with all cr-characters replaced by \-characters."
-
- ^ self copy replaceAll:(Character cr) by:$\
-!
-
-withoutSpaces
- "return a copy of myself without leading and trailing spaces.
- Notice: this does NOT remove tabs, newline or any other whitespace.
- Use withoutSeparators for this."
-
- |startIndex "{ Class: SmallInteger }"
- endIndex "{ Class: SmallInteger }"
- sz|
-
- sz := self size.
- startIndex := 1.
- endIndex := sz.
-
- [(startIndex < endIndex) and:[(self at:startIndex) == Character space]] whileTrue:[
- startIndex := startIndex + 1
- ].
- [(endIndex > 1) and:[(self at:endIndex) == Character space]] whileTrue:[
- endIndex := endIndex - 1
- ].
- startIndex > endIndex ifTrue:[
- ^ ''
- ].
- ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
- ^ self
- ].
- ^ self copyFrom:startIndex to:endIndex
-
- "
- ' foo ' withoutSpaces
- 'foo ' withoutSpaces
- ' foo' withoutSpaces
- ' ' withoutSpaces
- (' foo' , Character tab asString , ' ') withoutSpaces inspect
- "
-!
-
-withoutSeparators
- "return a copy of myself without leading and trailing whitespace.
- Whitespace is space, tab, newline, formfeed.
- Use withoutSpaces, if you want to remove spaces only."
-
- |startIndex "{ Class: SmallInteger }"
- endIndex "{ Class: SmallInteger }"
- sz|
-
- sz := self size.
- startIndex := 1.
- endIndex := sz.
-
- [(startIndex < endIndex) and:[(self at:startIndex) isSeparator]] whileTrue:[
- startIndex := startIndex + 1
- ].
- [(endIndex > 1) and:[(self at:endIndex) isSeparator]] whileTrue:[
- endIndex := endIndex - 1
- ].
- startIndex > endIndex ifTrue:[
- ^ ''
- ].
- ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
- ^ self
- ].
- ^ self copyFrom:startIndex to:endIndex
-
- "
- ' foo ' withoutSeparators
- ' foo' withoutSeparators
- 'foo ' withoutSeparators
- ' ' withoutSeparators
- (' foo' , Character tab asString , ' ') withoutSeparators inspect
- "
-!
-
-withTabs
- "return a copy of the receiver where leading spaces are
- replaced by tabulator characters (assuming 8-col tabs)"
-
- |idx nTabs newString|
-
- idx := self findFirst:[:c | (c ~~ Character space)].
- nTabs := (idx-1) // 8.
- nTabs == 0 ifTrue:[^ self].
-
- "any tabs"
- newString := self class new:(self size - (nTabs * 7)).
- newString atAll:(1 to:nTabs) put:(Character tab).
- newString replaceFrom:(nTabs + 1) with:self startingAt:(nTabs * 8 + 1).
- ^ newString
-
- "
- '12345678901234567890' withTabs
- ' 8901234567890' withTabs
- ' 901234567890' withTabs
- ' 67890' withTabs
- ' 7890' withTabs
- ' 890' withTabs
- "
-!
-
-withTabsExpanded
- "return a copy of the receiver where all tabulator characters
- are expanded into spaces (assuming 8-col tabs)"
-
- |idx str|
-
- (self includes:(Character tab)) ifFalse:[^ self].
- str := WriteStream on:String new.
-
- idx := 1.
- self do:[:ch |
- ch == Character tab ifFalse:[
- str nextPut:ch.
- idx := idx + 1
- ] ifTrue:[
- (idx \\ 8) to:8 do:[:ii |
- str space.
- idx := idx + 1
- ]
- ]
- ].
- ^ str contents
-! !
-
!CharacterArray methodsFor:'queries'!
isString