SmallSense__TokenExpressionMatcher.st
changeset 200 12d6a2f82d95
parent 199 c2d7421cb317
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SmallSense__TokenExpressionMatcher.st	Tue May 06 17:29:29 2014 +0100
@@ -0,0 +1,102 @@
+"{ Package: 'jv:smallsense' }"
+
+"{ NameSpace: SmallSense }"
+
+Regex::RxMatcher subclass:#TokenExpressionMatcher
+	instanceVariableNames:''
+	classVariableNames:''
+	poolDictionaries:''
+	category:'SmallSense-Utils-Matcher'
+!
+
+!TokenExpressionMatcher class methodsFor:'documentation'!
+
+documentation
+"
+    A custom regex matcher to match token streams.
+
+    [author:]
+        Jan Vrany <jan.vrany@fit.cvut.cz>
+
+    [instance variables:]
+
+    [class variables:]
+
+    [see also:]
+
+"
+! !
+
+!TokenExpressionMatcher methodsFor:'accessing'!
+
+subexpression: subIndex
+    "returns the matches for a parenthized subexpression.
+     notice that non-matching subexpressions deliver an empty matchString;
+     also be careful with nested parnethesis.
+     With index==1, you get the whole matchString"
+
+    | originalPosition start end reply |
+
+    originalPosition := self position.
+    start := self subBeginning: subIndex.
+    end := self subEnd: subIndex.
+    (start isNil or: [end isNil]) ifTrue: [^''].
+    reply := (Array new: end - start) writeStream.
+    self position: start.
+    start to: end - 1 do: [:ignored | reply nextPut: stream next].
+    self position: originalPosition.
+    ^reply contents
+
+    "
+     |matcher|
+
+     matcher := Regex::RxMatcher new 
+                    initializeFromString:'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[ ]+(:isDigit::isDigit:?)[ ]*,[ ]*19(:isDigit::isDigit:)'
+                    ignoreCase:false.
+     (matcher matches:'Aug 6, 1996') ifTrue:[
+        matcher subexpression:2
+     ] ifFalse:[
+        self error.
+     ].                       
+    "
+
+    "Created: / 06-05-2014 / 15:46:37 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+! !
+
+!TokenExpressionMatcher methodsFor:'double dispatch'!
+
+syntaxToken: tokenNode
+    | type value |
+
+    type := tokenNode type.
+    value := tokenNode value.
+
+    ^ Regex::RxmPredicate new predicate:
+        [:token |
+            (token isSymbol or:[token isCharacter]) ifTrue:[ 
+                (type == token) and:[ value isNil or:[value == token ] ]
+            ] ifFalse:[ 
+                (type == token type) and:[ value isNil or:[value = token value]  ]
+            ].
+        ].
+
+    "Created: / 06-05-2014 / 14:38:32 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 06-05-2014 / 15:59:07 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+! !
+
+!TokenExpressionMatcher methodsFor:'initialize-release'!
+
+initialize: syntaxTreeRoot ignoreCase: aBoolean
+        "Compile myself for the regex with the specified syntax tree.
+        See comment and `building' protocol in this class and 
+        #dispatchTo: methods in syntax tree components for details 
+        on double-dispatch building. 
+        The argument is supposedly a RxsRegex."
+
+        ignoreCase := aBoolean.
+        self buildFrom: syntaxTreeRoot.
+"/        startOptimizer := RxMatchOptimizer new initialize: syntaxTreeRoot ignoreCase: aBoolean
+
+    "Created: / 06-05-2014 / 14:39:36 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+! !
+