SmallSense__TokenPatternMatcher.st
author Jan Vrany <jan.vrany@fit.cvut.cz>
Thu, 15 May 2014 16:27:42 +0100
changeset 211 8d5259c39445
parent 203 c70b7351eda6
child 238 d5a32e41181f
permissions -rw-r--r--
Added support for CARET virtual token generated at cursor position (when provided) When a cursor position is provided, token stream return a special token with type #CARET is returned. This token can be then used and matched in token patterns.

"{ Package: 'jv:smallsense' }"

"{ NameSpace: SmallSense }"

Regex::RxMatcher subclass:#TokenPatternMatcher
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'SmallSense-Utils-Matcher'
!

!TokenPatternMatcher class methodsFor:'documentation'!

documentation
"
    A custom regex-like matcher to match token streams.

    [author:]
        Jan Vrany <jan.vrany@fit.cvut.cz>

    [instance variables:]

    [class variables:]

    [see also:]

"
! !

!TokenPatternMatcher methodsFor:'accessing'!

subexpression: subIndex
    "returns the matches for a parenthized subexpression.
     notice that non-matching subexpressions deliver an empty matchString;
     also be careful with nested parnethesis.
     With index==1, you get the whole matchString"

    | originalPosition start end reply |

    originalPosition := self position.
    start := self subBeginning: subIndex.
    end := self subEnd: subIndex.
    (start isNil or: [end isNil]) ifTrue: [^''].
    reply := (Array new: end - start) writeStream.
    self position: start.
    start to: end - 1 do: [:ignored | reply nextPut: stream next].
    self position: originalPosition.
    ^reply contents

    "
     |matcher|

     matcher := Regex::RxMatcher new 
                    initializeFromString:'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[ ]+(:isDigit::isDigit:?)[ ]*,[ ]*19(:isDigit::isDigit:)'
                    ignoreCase:false.
     (matcher matches:'Aug 6, 1996') ifTrue:[
        matcher subexpression:2
     ] ifFalse:[
        self error.
     ].                       
    "

    "Created: / 06-05-2014 / 15:46:37 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!TokenPatternMatcher methodsFor:'double dispatch'!

syntaxToken: tokenNode
    | type value |

    type := tokenNode type.
    value := tokenNode value.

    ^ Regex::RxmPredicate new predicate:
        [:token |
            (token isSymbol or:[token isCharacter]) ifTrue:[ 
                (type == token) and:[ value isNil or:[value == token ] ]
            ] ifFalse:[ 
                (type == token type) and:[ value isNil or:[value = token value]  ]
            ].
        ].

    "Created: / 06-05-2014 / 14:38:32 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    "Modified: / 06-05-2014 / 15:59:07 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

syntaxTokenSet: tokenSetNode
    | tokens|

    tokens := tokenSetNode tokens.

    ^ Regex::RxmPredicate new predicate:
        [:token |
            | matches |
            matches := tokens anySatisfy:[:tokenNode |
                | type value |

                type := tokenNode type.
                value := tokenNode value.
                (token isSymbol or:[token isCharacter]) ifTrue:[ 
                    (type = token) and:[ value isNil or:[value == token ] ]
                ] ifFalse:[ 
                    (type = token type) and:[ value isNil or:[value = token value]  ]
                ].
            ].
            tokenSetNode negated ifTrue:[ 
                matches := matches not.
            ].
            matches.
        ].

    "Created: / 09-05-2014 / 16:22:42 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!TokenPatternMatcher methodsFor:'initialize-release'!

initialize: syntaxTreeRoot ignoreCase: aBoolean
        "Compile myself for the regex with the specified syntax tree.
        See comment and `building' protocol in this class and 
        #dispatchTo: methods in syntax tree components for details 
        on double-dispatch building. 
        The argument is supposedly a RxsRegex."

        ignoreCase := aBoolean.
        self buildFrom: syntaxTreeRoot.
"/        startOptimizer := RxMatchOptimizer new initialize: syntaxTreeRoot ignoreCase: aBoolean

    "Created: / 06-05-2014 / 14:39:36 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !