--- a/CharacterArray.st Wed Mar 15 11:23:43 2006 +0100
+++ b/CharacterArray.st Wed Mar 15 11:33:32 2006 +0100
@@ -500,71 +500,94 @@
This is processed faster (especially with character ranges), and
can also be reused later. (if the same pattern is to be searched again)"
+ ^ self matchScanArrayFrom:aString escapeCharacter:(self matchEscapeCharacter)
+
+ "
+ String matchScanArrayFrom:'*ute*'
+ String matchScanArrayFrom:'**ute**'
+ String matchScanArrayFrom:'*uter'
+ String matchScanArrayFrom:'\*uter'
+ String matchScanArrayFrom:'[cC]#mpute[rR]'
+ String matchScanArrayFrom:'[abcd]*'
+ String matchScanArrayFrom:'[a-k]*'
+ String matchScanArrayFrom:'*some*compl*ern*'
+ String matchScanArrayFrom:'[a-'
+ String matchScanArrayFrom:'[a-zA-Z]'
+ String matchScanArrayFrom:'[a-z01234A-Z]'
+ "
+
+ "Modified: 2.4.1997 / 16:20:29 / cg"
+!
+
+matchScanArrayFrom:aString escapeCharacter:escape
+ "scan a pattern string and decompose it into a scanArray.
+ This is processed faster (especially with character ranges), and
+ can also be reused later. (if the same pattern is to be searched again)"
+
|coll
idx "{ Class: SmallInteger }"
- end c1 c2 matchSet previous escape|
+ end c1 c2 matchSet previous|
previous := nil.
- escape := self matchEscapeCharacter.
coll := OrderedCollection new.
idx := 1. end := aString size.
[idx <= end] whileTrue:[
- |char this|
-
- char := aString at:idx.
- char == $* ifTrue:[
- previous ~~ #anyString ifTrue:[
- this := #anyString
- ]
- ] ifFalse:[
- char == $# ifTrue:[
- previous ~~ #anyString ifTrue:[
- this := #any
- ]
- ] ifFalse:[
- char == $[ ifTrue:[
- matchSet := IdentitySet new.
- idx := idx + 1.
- idx > end ifTrue:[^ nil].
- char := aString at:idx.
- c1 := nil.
- [char ~~ $]] whileTrue:[
- ((char == $-) and:[c1 notNil]) ifTrue:[
- idx := idx + 1.
- idx > end ifTrue:[^ nil].
- c2 := aString at:idx.
- c1 to:c2 do:[:c | matchSet add:c].
- c1 := nil.
- idx := idx + 1.
- ] ifFalse:[
- (char ~~ $]) ifTrue:[
- matchSet add:char.
- c1 := char.
- idx := idx + 1
- ]
- ].
- idx > end ifTrue:[^ nil].
- char := aString at:idx
- ].
- this := matchSet asString
- ] ifFalse:[
- char == escape ifTrue:[
- idx := idx + 1.
- idx > end ifTrue:[
- "/ mhmh - what should we do here ?
- this := char
- ] ifFalse:[
- this := aString at:idx.
- ]
- ] ifFalse:[
- this := char
- ]
- ]
- ]
- ].
- this notNil ifTrue:[coll add:this. previous := this].
- idx := idx + 1
+ |char this|
+
+ char := aString at:idx.
+ char == $* ifTrue:[
+ previous ~~ #anyString ifTrue:[
+ this := #anyString
+ ]
+ ] ifFalse:[
+ char == $# ifTrue:[
+ previous ~~ #anyString ifTrue:[
+ this := #any
+ ]
+ ] ifFalse:[
+ char == $[ ifTrue:[
+ matchSet := IdentitySet new.
+ idx := idx + 1.
+ idx > end ifTrue:[^ nil].
+ char := aString at:idx.
+ c1 := nil.
+ [char ~~ $]] whileTrue:[
+ ((char == $-) and:[c1 notNil]) ifTrue:[
+ idx := idx + 1.
+ idx > end ifTrue:[^ nil].
+ c2 := aString at:idx.
+ c1 to:c2 do:[:c | matchSet add:c].
+ c1 := nil.
+ idx := idx + 1.
+ ] ifFalse:[
+ (char ~~ $]) ifTrue:[
+ matchSet add:char.
+ c1 := char.
+ idx := idx + 1
+ ]
+ ].
+ idx > end ifTrue:[^ nil].
+ char := aString at:idx
+ ].
+ this := matchSet asString
+ ] ifFalse:[
+ char == escape ifTrue:[
+ idx := idx + 1.
+ idx > end ifTrue:[
+ "/ mhmh - what should we do here ?
+ this := char
+ ] ifFalse:[
+ this := aString at:idx.
+ ]
+ ] ifFalse:[
+ this := char
+ ]
+ ]
+ ]
+ ].
+ this notNil ifTrue:[coll add:this. previous := this].
+ idx := idx + 1
].
^ coll asArray
@@ -667,17 +690,6 @@
!CharacterArray methodsFor:'Compatibility-ST/V'!
-asArraySeparatedBy:sepChar
- "return an array of substrings separated by sepChar.
- This is an ST/V compatibility method."
-
- ^ self asCollectionOfSubCollectionsSeparatedBy:sepChar
-
- "
- ' spaces at beginning' asArraySeparatedBy:Character space
- "
-!
-
byteAt:index put:aByte
"store a byte at given index.
This is an ST/V compatibility method."
@@ -3170,7 +3182,9 @@
|matchers|
matchers := self asCollectionOfSubstringsSeparatedBy:$;.
- ^ matchers contains:[:aPattern | (aPattern match:aString ignoreCase:ignoreCase)].
+ ^ matchers contains:[:aPattern |
+ aPattern match:aString ignoreCase:ignoreCase escapeCharacter:nil
+ ].
"/ matchers do:[:aPattern |
"/ (aPattern match:aString ignoreCase:ignoreCase) ifTrue:[^ true].
@@ -3315,8 +3329,9 @@
characters $* (to match any string) or $# (to match any character).
or [...] to match a set of characters.
Lower/uppercase are considered different.
+ The escape character is the backQuaote.
NOTICE: match-meta character interpretation is like in unix-matching,
- NOT the ST-80 meaning."
+ NOT the ST-80 meaning."
^ self match:aString from:1 to:aString size ignoreCase:false
@@ -3339,14 +3354,52 @@
"Modified: / 9.6.1998 / 18:50:00 / cg"
!
+match:aString escapeCharacter:escape
+ "return true if aString matches self, where self may contain meta-match
+ characters $* (to match any string) or $# (to match any character).
+ or [...] to match a set of characters.
+ Lower/uppercase are considered different.
+ NOTICE: match-meta character interpretation is like in unix-matching,
+ NOT the ST-80 meaning."
+
+ ^ self match:aString from:1 to:aString size ignoreCase:false escapeCharacter:escape
+
+ "
+ 'a\b\c\*' match:'a\b\c\d'
+ 'a\b\c\*' match:'a\b\c\d' escapeCharacter:nil
+ "
+!
+
match:aString from:start to:stop ignoreCase:ignoreCase
"return true if part of aString matches myself,
where self may contain meta-match
characters $* (to match any string) or $# (to match any character)
or [...] to match a set of characters.
If ignoreCase is true, lower/uppercase are considered the same.
+ The escape character is the backQuaote.
NOTICE: match-meta character interpretation is like in unix-matching,
- NOT the ST-80 meaning."
+ NOT the ST-80 meaning."
+
+ ^ self
+ match:aString from:start to:stop ignoreCase:ignoreCase
+ escapeCharacter:(self class matchEscapeCharacter)
+
+ "
+ '*ute*' match:'12345COMPUTER' from:1 to:5 ignoreCase:true
+ '*ute*' match:'12345COMPUTER' from:6 to:13 ignoreCase:true
+ "
+
+ "Modified: / 10.11.1998 / 21:43:46 / cg"
+!
+
+match:aString from:start to:stop ignoreCase:ignoreCase escapeCharacter:escape
+ "return true if part of aString matches myself,
+ where self may contain meta-match
+ characters $* (to match any string) or $# (to match any character)
+ or [...] to match a set of characters.
+ If ignoreCase is true, lower/uppercase are considered the same.
+ NOTICE: match-meta character interpretation is like in unix-matching,
+ NOT the ST-80 meaning."
|matchScanArray|
@@ -3357,23 +3410,23 @@
"
(PreviousMatch notNil
and:[PreviousMatch key = self]) ifTrue:[
- matchScanArray := PreviousMatch value
+ matchScanArray := PreviousMatch value
] ifFalse:[
- matchScanArray := self class matchScanArrayFrom:self.
- matchScanArray isNil ifTrue:[
- 'CharacterArray [info]: invalid matchpattern:''' infoPrint. self infoPrint. ''' comparing for equality.' infoPrintCR.
- ^ self = aString
+ matchScanArray := self class matchScanArrayFrom:self escapeCharacter:escape.
+ matchScanArray isNil ifTrue:[
+ 'CharacterArray [info]: invalid matchpattern:''' infoPrint. self infoPrint. ''' comparing for equality.' infoPrintCR.
+ ^ self = aString
"/ ^ false
- ].
- PreviousMatch := self -> matchScanArray.
+ ].
+ PreviousMatch := self -> matchScanArray.
].
^ self class
- matchScan:matchScanArray
- from:1 to:matchScanArray size
- with:aString
- from:start to:stop
- ignoreCase:ignoreCase
+ matchScan:matchScanArray
+ from:1 to:matchScanArray size
+ with:aString
+ from:start to:stop
+ ignoreCase:ignoreCase
"
'*ute*' match:'12345COMPUTER' from:1 to:5 ignoreCase:true
@@ -3388,8 +3441,9 @@
characters $* (to match any string) or $# (to match any character)
or [...] to match a set of characters.
If ignoreCase is true, lower/uppercase are considered the same.
+ The escape character is the backQuaote.
NOTICE: match-meta character interpretation is like in unix-matching,
- NOT the ST-80 meaning."
+ NOT the ST-80 meaning."
^ self match:aString from:1 to:aString size ignoreCase:ignoreCase
@@ -3406,14 +3460,51 @@
'*some*compl*ern*' match:'this is another complicated pattern match' ignoreCase:true
Time millisecondsToRun:[
- Symbol allInstancesDo:[:sym |
- '[ab]*' match:sym ignoreCase:false
- ]
+ Symbol allInstancesDo:[:sym |
+ '[ab]*' match:sym ignoreCase:false
+ ]
].
Time millisecondsToRun:[
- Symbol allInstancesDo:[:sym |
- '*at:*' match:sym ignoreCase:false
- ]
+ Symbol allInstancesDo:[:sym |
+ '*at:*' match:sym ignoreCase:false
+ ]
+ ].
+ "
+
+ "Modified: 2.4.1997 / 17:28:58 / cg"
+!
+
+match:aString ignoreCase:ignoreCase escapeCharacter:escape
+ "return true if aString matches self, where self may contain meta-match
+ characters $* (to match any string) or $# (to match any character)
+ or [...] to match a set of characters.
+ If ignoreCase is true, lower/uppercase are considered the same.
+ NOTICE: match-meta character interpretation is like in unix-matching,
+ NOT the ST-80 meaning."
+
+ ^ self match:aString from:1 to:aString size ignoreCase:ignoreCase escapeCharacter:escape
+
+ "
+ '*ute*' match:'COMPUTER' ignoreCase:true
+ '*uter' match:'COMPUTER' ignoreCase:false
+ '[abcd]*' match:'computer' ignoreCase:false
+ '[abcd]*' match:'Computer' ignoreCase:false
+ '[a-k]*' match:'komputer' ignoreCase:false
+ '[a-k]*' match:'zomputer' ignoreCase:false
+ '[a-k]*' match:'Komputer' ignoreCase:false
+ '[a-k]*' match:'Komputer' ignoreCase:true
+ '*some*compl*ern*' match:'this is some more complicated pattern match' ignoreCase:true
+ '*some*compl*ern*' match:'this is another complicated pattern match' ignoreCase:true
+
+ Time millisecondsToRun:[
+ Symbol allInstancesDo:[:sym |
+ '[ab]*' match:sym ignoreCase:false
+ ]
+ ].
+ Time millisecondsToRun:[
+ Symbol allInstancesDo:[:sym |
+ '*at:*' match:sym ignoreCase:false
+ ]
].
"
@@ -5181,7 +5272,7 @@
!CharacterArray class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.339 2006-03-14 13:13:44 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.340 2006-03-15 10:33:32 cg Exp $'
! !
CharacterArray initialize!