--- a/SmallSense__TokenPatternParser.st Fri May 09 15:22:12 2014 +0100
+++ b/SmallSense__TokenPatternParser.st Fri May 09 17:16:42 2014 +0100
@@ -2,13 +2,20 @@
"{ NameSpace: SmallSense }"
-Object subclass:#TokenPatternParser
- instanceVariableNames:'source stream'
+Regex::RxParser subclass:#TokenPatternParser
+ instanceVariableNames:''
classVariableNames:''
poolDictionaries:''
category:'SmallSense-Utils-Matcher'
!
+Regex::RxCharSetParser subclass:#TokenSpecParser
+ instanceVariableNames:''
+ classVariableNames:''
+ poolDictionaries:''
+ privateIn:TokenPatternParser
+!
+
!TokenPatternParser class methodsFor:'documentation'!
documentation
@@ -35,121 +42,80 @@
"Created: / 02-05-2014 / 18:56:14 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !
-!TokenPatternParser methodsFor:'parsing'!
+!TokenPatternParser methodsFor:'private'!
+
+characterSetFrom: setSpec
+ "<setSpec> is what goes between the brackets in a charset regex
+ (a String). Make a string containing all characters the spec specifies.
+ Spec is never empty."
-parse: anArrayOrStream
- source := anArrayOrStream readStream.
- ^ self parse.
+ | negated spec |
+ spec := ReadStream on: setSpec.
+ spec peek = $^
+ ifTrue: [negated := true.
+ spec next]
+ ifFalse: [negated := false].
+ ^ TokenPatternTokenSet new
+ initializeElements: (TokenSpecParser on: spec) parse
+ negated: negated
- "Created: / 02-05-2014 / 18:56:50 / Jan Vrany <jan.vrany@fit.cvut.cz>"
- "Modified: / 02-05-2014 / 21:27:48 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+ "Created: / 09-05-2014 / 15:48:02 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !
-!TokenPatternParser methodsFor:'parsing-private'!
+!TokenPatternParser methodsFor:'recursive descent'!
-expect: token
- source peek ~~ token ifTrue:[
- self error:'Expeciting ', token printString, ' got ' , source peek printString
+atom
+ | atom |
+
+ atom := super atom.
+ (atom isKindOf:Regex::RxsCharacter) ifTrue:[
+ atom := TokenPatternToken new type:atom character.
].
+ ^ atom
- "Created: / 02-05-2014 / 19:05:52 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-!
+ "Created: / 09-05-2014 / 15:56:01 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+! !
-parse
- ^ self parseRegex
+!TokenPatternParser::TokenSpecParser methodsFor:'parsing'!
+
+parseNamedSet
+ | type value done out |
- "Created: / 02-05-2014 / 18:56:50 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-!
-
-parseAtom
- | peek type value |
-
- peek := source peek.
- peek isSymbol ifTrue:[
- type := source next.
- source peek == #'->' ifTrue:[
- source next.
- source peek isString ifFalse:[
- self error: 'Expecting token value'.
- ] ifTrue:[
- value := source next.
+ self
+ match:$[;
+ match:$:.
+ done := false.
+ out := '' writeStream.
+ [ done ] whileFalse:[
+ lookahead == $\ ifTrue:[
+ "/ Escape sequence
+ lookahead := source next.
+ out nextPut: lookahead.
+ ] ifFalse:[
+ lookahead == $: ifTrue:[
+ done := true.
+ ] ifFalse:[
+ lookahead == $= ifTrue:[
+ type := out contents.
+ out reset.
+ ] ifFalse:[
+ out nextPut: lookahead.
+ ].
].
].
- ^ TokenPatternNode new initializeTyoe: type value: value.
- ].
- peek isCharacter ifTrue:[
- type := source next.
- ^ TokenPatternNode new initializeTyoe: type value: nil.
+ lookahead := source next.
].
- peek isArray ifTrue:[
- | savedSource newSource subRegex |
-
- newSource := source next readStream.
- savedSource := source.
- source := newSource.
- [
- subRegex := self parseRegex.
- ] ensure:[
- source := savedSource
- ].
- ^ subRegex
+ type isNil ifTrue:[
+ type := out contents.
+ ] ifFalse:[
+ value := out contents.
].
- self error:'Unknown atom type'.
-
- "Created: / 02-05-2014 / 21:17:29 / Jan Vrany <jan.vrany@fit.cvut.cz>"
- "Modified: / 09-05-2014 / 15:16:55 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-!
-
-parseBranch
-
- | piece branch |
-
- piece := self parsePiece.
- branch := (source peek isNil or:[ source peek == #'||' ])
- ifTrue:[ nil ]
- ifFalse:[ self parseBranch ].
- ^ Regex::RxsBranch new
- initializePiece: piece
- branch: branch
+ self match:$].
- "Created: / 02-05-2014 / 19:06:45 / Jan Vrany <jan.vrany@fit.cvut.cz>"
- "Modified: / 02-05-2014 / 21:11:08 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-!
-
-parsePiece
- | atom peek |
+ elements add:((TokenPatternToken new)
+ type:type;
+ value:value)
- atom := self parseAtom.
- peek := source peek.
- peek == $* ifTrue:[
- source next.
- ^ Regex::RxsPiece new initializeStarAtom: atom.
- ].
- peek == $+ ifTrue:[
- source next.
- ^ Regex::RxsPiece new initializePlusAtom: atom.
- ].
- peek == $? ifTrue:[
- source next.
- ^ Regex::RxsPiece new initializeOptionalAtom: atom.
- ].
- ^Regex::RxsPiece new initializeAtom: atom
-
- "Created: / 02-05-2014 / 21:11:08 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-!
-
-parseRegex
- | regex branch |
-
- branch := self parseBranch.
- source atEnd ifTrue:[
- regex := nil.
- ] ifFalse:[
- self expect: $|.
- regex := self parseRegex.
- ].
- ^ Regex::RxsRegex new initializeBranch: branch regex: regex
-
- "Created: / 02-05-2014 / 18:59:00 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+ "Modified: / 09-05-2014 / 16:35:38 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !