diff -r 9f05718319a0 -r f6d9d4129919 CharacterArray.st --- a/CharacterArray.st Wed Mar 15 11:23:43 2006 +0100 +++ b/CharacterArray.st Wed Mar 15 11:33:32 2006 +0100 @@ -500,71 +500,94 @@ This is processed faster (especially with character ranges), and can also be reused later. (if the same pattern is to be searched again)" + ^ self matchScanArrayFrom:aString escapeCharacter:(self matchEscapeCharacter) + + " + String matchScanArrayFrom:'*ute*' + String matchScanArrayFrom:'**ute**' + String matchScanArrayFrom:'*uter' + String matchScanArrayFrom:'\*uter' + String matchScanArrayFrom:'[cC]#mpute[rR]' + String matchScanArrayFrom:'[abcd]*' + String matchScanArrayFrom:'[a-k]*' + String matchScanArrayFrom:'*some*compl*ern*' + String matchScanArrayFrom:'[a-' + String matchScanArrayFrom:'[a-zA-Z]' + String matchScanArrayFrom:'[a-z01234A-Z]' + " + + "Modified: 2.4.1997 / 16:20:29 / cg" +! + +matchScanArrayFrom:aString escapeCharacter:escape + "scan a pattern string and decompose it into a scanArray. + This is processed faster (especially with character ranges), and + can also be reused later. (if the same pattern is to be searched again)" + |coll idx "{ Class: SmallInteger }" - end c1 c2 matchSet previous escape| + end c1 c2 matchSet previous| previous := nil. - escape := self matchEscapeCharacter. coll := OrderedCollection new. idx := 1. end := aString size. [idx <= end] whileTrue:[ - |char this| - - char := aString at:idx. - char == $* ifTrue:[ - previous ~~ #anyString ifTrue:[ - this := #anyString - ] - ] ifFalse:[ - char == $# ifTrue:[ - previous ~~ #anyString ifTrue:[ - this := #any - ] - ] ifFalse:[ - char == $[ ifTrue:[ - matchSet := IdentitySet new. - idx := idx + 1. - idx > end ifTrue:[^ nil]. - char := aString at:idx. - c1 := nil. - [char ~~ $]] whileTrue:[ - ((char == $-) and:[c1 notNil]) ifTrue:[ - idx := idx + 1. - idx > end ifTrue:[^ nil]. - c2 := aString at:idx. - c1 to:c2 do:[:c | matchSet add:c]. - c1 := nil. - idx := idx + 1. - ] ifFalse:[ - (char ~~ $]) ifTrue:[ - matchSet add:char. - c1 := char. - idx := idx + 1 - ] - ]. - idx > end ifTrue:[^ nil]. - char := aString at:idx - ]. - this := matchSet asString - ] ifFalse:[ - char == escape ifTrue:[ - idx := idx + 1. - idx > end ifTrue:[ - "/ mhmh - what should we do here ? - this := char - ] ifFalse:[ - this := aString at:idx. - ] - ] ifFalse:[ - this := char - ] - ] - ] - ]. - this notNil ifTrue:[coll add:this. previous := this]. - idx := idx + 1 + |char this| + + char := aString at:idx. + char == $* ifTrue:[ + previous ~~ #anyString ifTrue:[ + this := #anyString + ] + ] ifFalse:[ + char == $# ifTrue:[ + previous ~~ #anyString ifTrue:[ + this := #any + ] + ] ifFalse:[ + char == $[ ifTrue:[ + matchSet := IdentitySet new. + idx := idx + 1. + idx > end ifTrue:[^ nil]. + char := aString at:idx. + c1 := nil. + [char ~~ $]] whileTrue:[ + ((char == $-) and:[c1 notNil]) ifTrue:[ + idx := idx + 1. + idx > end ifTrue:[^ nil]. + c2 := aString at:idx. + c1 to:c2 do:[:c | matchSet add:c]. + c1 := nil. + idx := idx + 1. + ] ifFalse:[ + (char ~~ $]) ifTrue:[ + matchSet add:char. + c1 := char. + idx := idx + 1 + ] + ]. + idx > end ifTrue:[^ nil]. + char := aString at:idx + ]. + this := matchSet asString + ] ifFalse:[ + char == escape ifTrue:[ + idx := idx + 1. + idx > end ifTrue:[ + "/ mhmh - what should we do here ? + this := char + ] ifFalse:[ + this := aString at:idx. + ] + ] ifFalse:[ + this := char + ] + ] + ] + ]. + this notNil ifTrue:[coll add:this. previous := this]. + idx := idx + 1 ]. ^ coll asArray @@ -667,17 +690,6 @@ !CharacterArray methodsFor:'Compatibility-ST/V'! -asArraySeparatedBy:sepChar - "return an array of substrings separated by sepChar. - This is an ST/V compatibility method." - - ^ self asCollectionOfSubCollectionsSeparatedBy:sepChar - - " - ' spaces at beginning' asArraySeparatedBy:Character space - " -! - byteAt:index put:aByte "store a byte at given index. This is an ST/V compatibility method." @@ -3170,7 +3182,9 @@ |matchers| matchers := self asCollectionOfSubstringsSeparatedBy:$;. - ^ matchers contains:[:aPattern | (aPattern match:aString ignoreCase:ignoreCase)]. + ^ matchers contains:[:aPattern | + aPattern match:aString ignoreCase:ignoreCase escapeCharacter:nil + ]. "/ matchers do:[:aPattern | "/ (aPattern match:aString ignoreCase:ignoreCase) ifTrue:[^ true]. @@ -3315,8 +3329,9 @@ characters $* (to match any string) or $# (to match any character). or [...] to match a set of characters. Lower/uppercase are considered different. + The escape character is the backQuaote. NOTICE: match-meta character interpretation is like in unix-matching, - NOT the ST-80 meaning." + NOT the ST-80 meaning." ^ self match:aString from:1 to:aString size ignoreCase:false @@ -3339,14 +3354,52 @@ "Modified: / 9.6.1998 / 18:50:00 / cg" ! +match:aString escapeCharacter:escape + "return true if aString matches self, where self may contain meta-match + characters $* (to match any string) or $# (to match any character). + or [...] to match a set of characters. + Lower/uppercase are considered different. + NOTICE: match-meta character interpretation is like in unix-matching, + NOT the ST-80 meaning." + + ^ self match:aString from:1 to:aString size ignoreCase:false escapeCharacter:escape + + " + 'a\b\c\*' match:'a\b\c\d' + 'a\b\c\*' match:'a\b\c\d' escapeCharacter:nil + " +! + match:aString from:start to:stop ignoreCase:ignoreCase "return true if part of aString matches myself, where self may contain meta-match characters $* (to match any string) or $# (to match any character) or [...] to match a set of characters. If ignoreCase is true, lower/uppercase are considered the same. + The escape character is the backQuaote. NOTICE: match-meta character interpretation is like in unix-matching, - NOT the ST-80 meaning." + NOT the ST-80 meaning." + + ^ self + match:aString from:start to:stop ignoreCase:ignoreCase + escapeCharacter:(self class matchEscapeCharacter) + + " + '*ute*' match:'12345COMPUTER' from:1 to:5 ignoreCase:true + '*ute*' match:'12345COMPUTER' from:6 to:13 ignoreCase:true + " + + "Modified: / 10.11.1998 / 21:43:46 / cg" +! + +match:aString from:start to:stop ignoreCase:ignoreCase escapeCharacter:escape + "return true if part of aString matches myself, + where self may contain meta-match + characters $* (to match any string) or $# (to match any character) + or [...] to match a set of characters. + If ignoreCase is true, lower/uppercase are considered the same. + NOTICE: match-meta character interpretation is like in unix-matching, + NOT the ST-80 meaning." |matchScanArray| @@ -3357,23 +3410,23 @@ " (PreviousMatch notNil and:[PreviousMatch key = self]) ifTrue:[ - matchScanArray := PreviousMatch value + matchScanArray := PreviousMatch value ] ifFalse:[ - matchScanArray := self class matchScanArrayFrom:self. - matchScanArray isNil ifTrue:[ - 'CharacterArray [info]: invalid matchpattern:''' infoPrint. self infoPrint. ''' comparing for equality.' infoPrintCR. - ^ self = aString + matchScanArray := self class matchScanArrayFrom:self escapeCharacter:escape. + matchScanArray isNil ifTrue:[ + 'CharacterArray [info]: invalid matchpattern:''' infoPrint. self infoPrint. ''' comparing for equality.' infoPrintCR. + ^ self = aString "/ ^ false - ]. - PreviousMatch := self -> matchScanArray. + ]. + PreviousMatch := self -> matchScanArray. ]. ^ self class - matchScan:matchScanArray - from:1 to:matchScanArray size - with:aString - from:start to:stop - ignoreCase:ignoreCase + matchScan:matchScanArray + from:1 to:matchScanArray size + with:aString + from:start to:stop + ignoreCase:ignoreCase " '*ute*' match:'12345COMPUTER' from:1 to:5 ignoreCase:true @@ -3388,8 +3441,9 @@ characters $* (to match any string) or $# (to match any character) or [...] to match a set of characters. If ignoreCase is true, lower/uppercase are considered the same. + The escape character is the backQuaote. NOTICE: match-meta character interpretation is like in unix-matching, - NOT the ST-80 meaning." + NOT the ST-80 meaning." ^ self match:aString from:1 to:aString size ignoreCase:ignoreCase @@ -3406,14 +3460,51 @@ '*some*compl*ern*' match:'this is another complicated pattern match' ignoreCase:true Time millisecondsToRun:[ - Symbol allInstancesDo:[:sym | - '[ab]*' match:sym ignoreCase:false - ] + Symbol allInstancesDo:[:sym | + '[ab]*' match:sym ignoreCase:false + ] ]. Time millisecondsToRun:[ - Symbol allInstancesDo:[:sym | - '*at:*' match:sym ignoreCase:false - ] + Symbol allInstancesDo:[:sym | + '*at:*' match:sym ignoreCase:false + ] + ]. + " + + "Modified: 2.4.1997 / 17:28:58 / cg" +! + +match:aString ignoreCase:ignoreCase escapeCharacter:escape + "return true if aString matches self, where self may contain meta-match + characters $* (to match any string) or $# (to match any character) + or [...] to match a set of characters. + If ignoreCase is true, lower/uppercase are considered the same. + NOTICE: match-meta character interpretation is like in unix-matching, + NOT the ST-80 meaning." + + ^ self match:aString from:1 to:aString size ignoreCase:ignoreCase escapeCharacter:escape + + " + '*ute*' match:'COMPUTER' ignoreCase:true + '*uter' match:'COMPUTER' ignoreCase:false + '[abcd]*' match:'computer' ignoreCase:false + '[abcd]*' match:'Computer' ignoreCase:false + '[a-k]*' match:'komputer' ignoreCase:false + '[a-k]*' match:'zomputer' ignoreCase:false + '[a-k]*' match:'Komputer' ignoreCase:false + '[a-k]*' match:'Komputer' ignoreCase:true + '*some*compl*ern*' match:'this is some more complicated pattern match' ignoreCase:true + '*some*compl*ern*' match:'this is another complicated pattern match' ignoreCase:true + + Time millisecondsToRun:[ + Symbol allInstancesDo:[:sym | + '[ab]*' match:sym ignoreCase:false + ] + ]. + Time millisecondsToRun:[ + Symbol allInstancesDo:[:sym | + '*at:*' match:sym ignoreCase:false + ] ]. " @@ -5181,7 +5272,7 @@ !CharacterArray class methodsFor:'documentation'! version - ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.339 2006-03-14 13:13:44 cg Exp $' + ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.340 2006-03-15 10:33:32 cg Exp $' ! ! CharacterArray initialize!