SmallSense__TokenPatternParser.st
changeset 203 c70b7351eda6
parent 202 5c8610dad14c
child 204 190357b490fd
--- a/SmallSense__TokenPatternParser.st	Fri May 09 15:22:12 2014 +0100
+++ b/SmallSense__TokenPatternParser.st	Fri May 09 17:16:42 2014 +0100
@@ -2,13 +2,20 @@
 
 "{ NameSpace: SmallSense }"
 
-Object subclass:#TokenPatternParser
-	instanceVariableNames:'source stream'
+Regex::RxParser subclass:#TokenPatternParser
+	instanceVariableNames:''
 	classVariableNames:''
 	poolDictionaries:''
 	category:'SmallSense-Utils-Matcher'
 !
 
+Regex::RxCharSetParser subclass:#TokenSpecParser
+	instanceVariableNames:''
+	classVariableNames:''
+	poolDictionaries:''
+	privateIn:TokenPatternParser
+!
+
 !TokenPatternParser class methodsFor:'documentation'!
 
 documentation
@@ -35,121 +42,80 @@
     "Created: / 02-05-2014 / 18:56:14 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 ! !
 
-!TokenPatternParser methodsFor:'parsing'!
+!TokenPatternParser methodsFor:'private'!
+
+characterSetFrom: setSpec
+        "<setSpec> is what goes between the brackets in a charset regex
+        (a String). Make a string containing all characters the spec specifies.
+        Spec is never empty."
 
-parse: anArrayOrStream
-    source := anArrayOrStream readStream.
-    ^ self parse.
+        | negated spec |
+        spec := ReadStream on: setSpec.
+        spec peek = $^
+                ifTrue:         [negated := true.
+                                spec next]
+                ifFalse:        [negated := false].
+        ^ TokenPatternTokenSet new
+                initializeElements: (TokenSpecParser on: spec) parse
+                negated: negated
 
-    "Created: / 02-05-2014 / 18:56:50 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-    "Modified: / 02-05-2014 / 21:27:48 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Created: / 09-05-2014 / 15:48:02 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 ! !
 
-!TokenPatternParser methodsFor:'parsing-private'!
+!TokenPatternParser methodsFor:'recursive descent'!
 
-expect: token
-    source peek ~~ token ifTrue:[ 
-        self error:'Expeciting ', token printString, ' got ' , source peek printString
+atom
+    | atom |
+
+    atom := super atom.
+    (atom isKindOf:Regex::RxsCharacter) ifTrue:[
+        atom := TokenPatternToken new type:atom character.
     ].
+    ^ atom
 
-    "Created: / 02-05-2014 / 19:05:52 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-!
+    "Created: / 09-05-2014 / 15:56:01 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+! !
 
-parse
-    ^ self parseRegex
+!TokenPatternParser::TokenSpecParser methodsFor:'parsing'!
+
+parseNamedSet
+    | type value done out |
 
-    "Created: / 02-05-2014 / 18:56:50 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-!
-
-parseAtom
-    | peek type value |
-
-    peek := source peek.
-    peek isSymbol ifTrue:[ 
-        type := source next.
-        source peek == #'->' ifTrue:[ 
-            source next.
-            source peek isString ifFalse:[ 
-                self error: 'Expecting token value'.
-            ] ifTrue:[ 
-                value := source next.
+    self
+        match:$[;
+        match:$:.
+    done := false.
+    out := '' writeStream.
+    [ done ] whileFalse:[ 
+        lookahead == $\ ifTrue:[ 
+            "/ Escape sequence
+            lookahead := source next.
+            out nextPut: lookahead.
+        ] ifFalse:[ 
+            lookahead == $: ifTrue:[ 
+                done := true.
+            ] ifFalse:[ 
+                lookahead == $= ifTrue:[ 
+                    type := out contents.
+                    out reset.
+                ] ifFalse:[ 
+                    out nextPut: lookahead.
+                ].
             ].
         ].
-        ^ TokenPatternNode new initializeTyoe: type value: value.
-    ].
-    peek isCharacter ifTrue:[ 
-        type := source next.
-        ^ TokenPatternNode new initializeTyoe: type value: nil.                                
+        lookahead := source next.
     ].
-    peek isArray ifTrue:[ 
-        | savedSource newSource subRegex |
-
-        newSource := source next readStream.
-        savedSource := source.
-        source := newSource.
-        [ 
-            subRegex := self parseRegex.
-        ] ensure:[ 
-            source := savedSource
-        ].
-        ^ subRegex
+    type isNil ifTrue:[ 
+        type := out contents.
+    ] ifFalse:[
+        value := out contents.
     ].
-    self error:'Unknown atom type'.
-
-    "Created: / 02-05-2014 / 21:17:29 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-    "Modified: / 09-05-2014 / 15:16:55 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-!
-
-parseBranch
-
-    | piece branch |
-
-    piece := self parsePiece.
-    branch := (source peek isNil or:[ source peek == #'||' ]) 
-        ifTrue:[ nil ]
-        ifFalse:[ self parseBranch ].
-    ^ Regex::RxsBranch new 
-        initializePiece: piece 
-        branch: branch
+    self match:$].
 
-    "Created: / 02-05-2014 / 19:06:45 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-    "Modified: / 02-05-2014 / 21:11:08 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-!
-
-parsePiece
-    | atom peek |
+    elements add:((TokenPatternToken new)
+                type:type;
+                value:value)
 
-    atom := self parseAtom.
-    peek := source peek.
-    peek == $* ifTrue:[ 
-        source next.
-        ^ Regex::RxsPiece new initializeStarAtom: atom.  
-    ].
-    peek == $+ ifTrue:[ 
-        source next.
-        ^ Regex::RxsPiece new initializePlusAtom: atom.  
-    ].
-    peek == $? ifTrue:[ 
-        source next.
-        ^ Regex::RxsPiece new initializeOptionalAtom: atom.  
-    ].
-     ^Regex::RxsPiece new initializeAtom: atom
-
-    "Created: / 02-05-2014 / 21:11:08 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-!
-
-parseRegex
-    | regex branch |
-
-    branch := self parseBranch.
-    source atEnd ifTrue:[ 
-        regex := nil.
-    ] ifFalse:[
-        self expect: $|.
-        regex := self parseRegex.
-    ].
-    ^ Regex::RxsRegex new initializeBranch: branch regex: regex
-
-    "Created: / 02-05-2014 / 18:59:00 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+    "Modified: / 09-05-2014 / 16:35:38 / Jan Vrany <jan.vrany@fit.cvut.cz>"
 ! !