CharacterArray.st
changeset 9282 f6d9d4129919
parent 9275 556e8b67ad2d
child 9306 de9b438212b8
--- a/CharacterArray.st	Wed Mar 15 11:23:43 2006 +0100
+++ b/CharacterArray.st	Wed Mar 15 11:33:32 2006 +0100
@@ -500,71 +500,94 @@
      This is processed faster (especially with character ranges), and
      can also be reused later. (if the same pattern is to be searched again)"
 
+    ^ self matchScanArrayFrom:aString escapeCharacter:(self matchEscapeCharacter)
+
+    "
+     String matchScanArrayFrom:'*ute*'
+     String matchScanArrayFrom:'**ute**'
+     String matchScanArrayFrom:'*uter'
+     String matchScanArrayFrom:'\*uter'
+     String matchScanArrayFrom:'[cC]#mpute[rR]'
+     String matchScanArrayFrom:'[abcd]*'
+     String matchScanArrayFrom:'[a-k]*'
+     String matchScanArrayFrom:'*some*compl*ern*'
+     String matchScanArrayFrom:'[a-'
+     String matchScanArrayFrom:'[a-zA-Z]'
+     String matchScanArrayFrom:'[a-z01234A-Z]'
+    "
+
+    "Modified: 2.4.1997 / 16:20:29 / cg"
+!
+
+matchScanArrayFrom:aString escapeCharacter:escape
+    "scan a pattern string and decompose it into a scanArray.
+     This is processed faster (especially with character ranges), and
+     can also be reused later. (if the same pattern is to be searched again)"
+
     |coll
      idx "{ Class: SmallInteger }"
-     end c1 c2 matchSet previous escape|
+     end c1 c2 matchSet previous|
 
     previous := nil.
-    escape := self matchEscapeCharacter.
 
     coll := OrderedCollection new.
     idx := 1. end := aString size.
     [idx <= end] whileTrue:[
-	|char this|
-
-	char := aString at:idx.
-	char == $* ifTrue:[
-	    previous ~~ #anyString ifTrue:[
-		this := #anyString
-	    ]
-	] ifFalse:[
-	    char == $# ifTrue:[
-		previous ~~ #anyString ifTrue:[
-		    this := #any
-		]
-	    ] ifFalse:[
-		char == $[ ifTrue:[
-		    matchSet := IdentitySet new.
-		    idx := idx + 1.
-		    idx > end ifTrue:[^ nil].
-		    char := aString at:idx.
-		    c1 := nil.
-		    [char ~~ $]] whileTrue:[
-			((char == $-) and:[c1 notNil]) ifTrue:[
-			    idx := idx + 1.
-			    idx > end ifTrue:[^ nil].
-			    c2 := aString at:idx.
-			    c1 to:c2 do:[:c | matchSet add:c].
-			    c1 := nil.
-			    idx := idx + 1.
-			] ifFalse:[
-			    (char ~~ $]) ifTrue:[
-				matchSet add:char.
-				c1 := char.
-				idx := idx + 1
-			    ]
-			].
-			idx > end ifTrue:[^ nil].
-			char := aString at:idx
-		    ].
-		    this := matchSet asString
-		] ifFalse:[
-		    char == escape ifTrue:[
-			idx := idx + 1.
-			idx > end ifTrue:[
-			    "/ mhmh - what should we do here ?
-			    this := char
-			] ifFalse:[
-			    this := aString at:idx.
-			]
-		    ] ifFalse:[
-			this := char
-		    ]
-		]
-	    ]
-	].
-	this notNil ifTrue:[coll add:this. previous := this].
-	idx := idx + 1
+        |char this|
+
+        char := aString at:idx.
+        char == $* ifTrue:[
+            previous ~~ #anyString ifTrue:[
+                this := #anyString
+            ]
+        ] ifFalse:[
+            char == $# ifTrue:[
+                previous ~~ #anyString ifTrue:[
+                    this := #any
+                ]
+            ] ifFalse:[
+                char == $[ ifTrue:[
+                    matchSet := IdentitySet new.
+                    idx := idx + 1.
+                    idx > end ifTrue:[^ nil].
+                    char := aString at:idx.
+                    c1 := nil.
+                    [char ~~ $]] whileTrue:[
+                        ((char == $-) and:[c1 notNil]) ifTrue:[
+                            idx := idx + 1.
+                            idx > end ifTrue:[^ nil].
+                            c2 := aString at:idx.
+                            c1 to:c2 do:[:c | matchSet add:c].
+                            c1 := nil.
+                            idx := idx + 1.
+                        ] ifFalse:[
+                            (char ~~ $]) ifTrue:[
+                                matchSet add:char.
+                                c1 := char.
+                                idx := idx + 1
+                            ]
+                        ].
+                        idx > end ifTrue:[^ nil].
+                        char := aString at:idx
+                    ].
+                    this := matchSet asString
+                ] ifFalse:[
+                    char == escape ifTrue:[
+                        idx := idx + 1.
+                        idx > end ifTrue:[
+                            "/ mhmh - what should we do here ?
+                            this := char
+                        ] ifFalse:[
+                            this := aString at:idx.
+                        ]
+                    ] ifFalse:[
+                        this := char
+                    ]
+                ]
+            ]
+        ].
+        this notNil ifTrue:[coll add:this. previous := this].
+        idx := idx + 1
     ].
 
     ^ coll asArray
@@ -667,17 +690,6 @@
 
 !CharacterArray methodsFor:'Compatibility-ST/V'!
 
-asArraySeparatedBy:sepChar
-    "return an array of substrings separated by sepChar.
-     This is an ST/V compatibility method."
-
-    ^ self asCollectionOfSubCollectionsSeparatedBy:sepChar
-
-    "
-     '    spaces at beginning' asArraySeparatedBy:Character space
-    "
-!
-
 byteAt:index put:aByte
     "store a byte at given index.
      This is an ST/V compatibility method."
@@ -3170,7 +3182,9 @@
     |matchers|
 
     matchers := self asCollectionOfSubstringsSeparatedBy:$;.
-    ^ matchers contains:[:aPattern | (aPattern match:aString ignoreCase:ignoreCase)].
+    ^ matchers contains:[:aPattern | 
+        aPattern match:aString ignoreCase:ignoreCase escapeCharacter:nil
+      ].
 
 "/    matchers do:[:aPattern |
 "/        (aPattern match:aString ignoreCase:ignoreCase) ifTrue:[^ true].
@@ -3315,8 +3329,9 @@
      characters $* (to match any string) or $# (to match any character).
      or [...] to match a set of characters.
      Lower/uppercase are considered different.
+     The escape character is the backQuaote.
      NOTICE: match-meta character interpretation is like in unix-matching,
-	     NOT the ST-80 meaning."
+             NOT the ST-80 meaning."
 
     ^ self match:aString from:1 to:aString size ignoreCase:false
 
@@ -3339,14 +3354,52 @@
     "Modified: / 9.6.1998 / 18:50:00 / cg"
 !
 
+match:aString escapeCharacter:escape
+    "return true if aString matches self, where self may contain meta-match
+     characters $* (to match any string) or $# (to match any character).
+     or [...] to match a set of characters.
+     Lower/uppercase are considered different.
+     NOTICE: match-meta character interpretation is like in unix-matching,
+             NOT the ST-80 meaning."
+
+    ^ self match:aString from:1 to:aString size ignoreCase:false escapeCharacter:escape
+
+    "
+     'a\b\c\*' match:'a\b\c\d'
+     'a\b\c\*' match:'a\b\c\d' escapeCharacter:nil
+    "
+!
+
 match:aString from:start to:stop ignoreCase:ignoreCase
     "return true if part of aString matches myself,
      where self may contain meta-match
      characters $* (to match any string) or $# (to match any character)
      or [...] to match a set of characters.
      If ignoreCase is true, lower/uppercase are considered the same.
+     The escape character is the backQuaote.
      NOTICE: match-meta character interpretation is like in unix-matching,
-	     NOT the ST-80 meaning."
+             NOT the ST-80 meaning."
+
+    ^ self 
+        match:aString from:start to:stop ignoreCase:ignoreCase
+        escapeCharacter:(self class matchEscapeCharacter)
+
+    "
+     '*ute*' match:'12345COMPUTER' from:1 to:5 ignoreCase:true
+     '*ute*' match:'12345COMPUTER' from:6 to:13 ignoreCase:true
+    "
+
+    "Modified: / 10.11.1998 / 21:43:46 / cg"
+!
+
+match:aString from:start to:stop ignoreCase:ignoreCase escapeCharacter:escape
+    "return true if part of aString matches myself,
+     where self may contain meta-match
+     characters $* (to match any string) or $# (to match any character)
+     or [...] to match a set of characters.
+     If ignoreCase is true, lower/uppercase are considered the same.
+     NOTICE: match-meta character interpretation is like in unix-matching,
+             NOT the ST-80 meaning."
 
     |matchScanArray|
 
@@ -3357,23 +3410,23 @@
     "
     (PreviousMatch notNil
     and:[PreviousMatch key = self]) ifTrue:[
-	matchScanArray := PreviousMatch value
+        matchScanArray := PreviousMatch value
     ] ifFalse:[
-	matchScanArray := self class matchScanArrayFrom:self.
-	matchScanArray isNil ifTrue:[
-	    'CharacterArray [info]: invalid matchpattern:''' infoPrint. self infoPrint. ''' comparing for equality.' infoPrintCR.
-	    ^ self = aString
+        matchScanArray := self class matchScanArrayFrom:self escapeCharacter:escape.
+        matchScanArray isNil ifTrue:[
+            'CharacterArray [info]: invalid matchpattern:''' infoPrint. self infoPrint. ''' comparing for equality.' infoPrintCR.
+            ^ self = aString
 "/            ^ false
-	].
-	PreviousMatch := self -> matchScanArray.
+        ].
+        PreviousMatch := self -> matchScanArray.
     ].
 
     ^ self class
-	matchScan:matchScanArray
-	from:1 to:matchScanArray size
-	with:aString
-	from:start to:stop
-	ignoreCase:ignoreCase
+        matchScan:matchScanArray
+        from:1 to:matchScanArray size
+        with:aString
+        from:start to:stop
+        ignoreCase:ignoreCase
 
     "
      '*ute*' match:'12345COMPUTER' from:1 to:5 ignoreCase:true
@@ -3388,8 +3441,9 @@
      characters $* (to match any string) or $# (to match any character)
      or [...] to match a set of characters.
      If ignoreCase is true, lower/uppercase are considered the same.
+     The escape character is the backQuaote.
      NOTICE: match-meta character interpretation is like in unix-matching,
-	     NOT the ST-80 meaning."
+             NOT the ST-80 meaning."
 
     ^ self match:aString from:1 to:aString size ignoreCase:ignoreCase
 
@@ -3406,14 +3460,51 @@
      '*some*compl*ern*' match:'this is another complicated pattern match' ignoreCase:true
 
      Time millisecondsToRun:[
-	Symbol allInstancesDo:[:sym |
-	    '[ab]*' match:sym ignoreCase:false
-	]
+        Symbol allInstancesDo:[:sym |
+            '[ab]*' match:sym ignoreCase:false
+        ]
      ].
      Time millisecondsToRun:[
-	Symbol allInstancesDo:[:sym |
-	    '*at:*' match:sym ignoreCase:false
-	]
+        Symbol allInstancesDo:[:sym |
+            '*at:*' match:sym ignoreCase:false
+        ]
+     ].
+    "
+
+    "Modified: 2.4.1997 / 17:28:58 / cg"
+!
+
+match:aString ignoreCase:ignoreCase escapeCharacter:escape
+    "return true if aString matches self, where self may contain meta-match
+     characters $* (to match any string) or $# (to match any character)
+     or [...] to match a set of characters.
+     If ignoreCase is true, lower/uppercase are considered the same.
+     NOTICE: match-meta character interpretation is like in unix-matching,
+             NOT the ST-80 meaning."
+
+    ^ self match:aString from:1 to:aString size ignoreCase:ignoreCase escapeCharacter:escape
+
+    "
+     '*ute*' match:'COMPUTER' ignoreCase:true
+     '*uter' match:'COMPUTER' ignoreCase:false
+     '[abcd]*' match:'computer' ignoreCase:false
+     '[abcd]*' match:'Computer' ignoreCase:false
+     '[a-k]*' match:'komputer' ignoreCase:false
+     '[a-k]*' match:'zomputer' ignoreCase:false
+     '[a-k]*' match:'Komputer' ignoreCase:false
+     '[a-k]*' match:'Komputer' ignoreCase:true
+     '*some*compl*ern*' match:'this is some more complicated pattern match' ignoreCase:true
+     '*some*compl*ern*' match:'this is another complicated pattern match' ignoreCase:true
+
+     Time millisecondsToRun:[
+        Symbol allInstancesDo:[:sym |
+            '[ab]*' match:sym ignoreCase:false
+        ]
+     ].
+     Time millisecondsToRun:[
+        Symbol allInstancesDo:[:sym |
+            '*at:*' match:sym ignoreCase:false
+        ]
      ].
     "
 
@@ -5181,7 +5272,7 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.339 2006-03-14 13:13:44 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.340 2006-03-15 10:33:32 cg Exp $'
 ! !
 
 CharacterArray initialize!