diff -r c446d835cbba -r 44b2dcba820e PPParser.st --- a/PPParser.st Tue Mar 04 15:33:10 2014 +0100 +++ b/PPParser.st Tue Mar 04 15:33:11 2014 +0100 @@ -18,6 +18,324 @@ ^ self basicNew initialize ! ! +!PPParser methodsFor:'*petitanalyzer-enumerating'! + +allParsers + "Answer all the parse nodes of the receiver." + + | result | + result := OrderedCollection new. + self allParsersDo: [ :parser | result addLast: parser ]. + ^ result +! + +allParsersDo: aBlock + "Iterate over all the parse nodes of the receiver." + + self allParsersDo: aBlock seen: IdentitySet new +! + +allParsersDo: aBlock seen: aSet + "Iterate over all the parse nodes of the receiver, do not visit and follow the ones contained in aSet." + + (aSet includes: self) + ifTrue: [ ^ self ]. + aSet add: self. + aBlock value: self. + self children + do: [ :each | each allParsersDo: aBlock seen: aSet ] +! ! + +!PPParser methodsFor:'*petitanalyzer-matching'! + +copyInContext: aDictionary + ^ self copyInContext: aDictionary seen: IdentityDictionary new +! + +copyInContext: aDictionary seen: aSeenDictionary + | copy | + aSeenDictionary + at: self + ifPresent: [ :value | ^ value ]. + copy := aSeenDictionary + at: self + put: self copy. + copy children do: [ :each | + copy + replace: each + with: (each copyInContext: aDictionary seen: aSeenDictionary) ]. + ^ copy +! + +match: aParser inContext: aDictionary + ^ self match: aParser inContext: aDictionary seen: IdentitySet new +! + +match: aParser inContext: aDictionary seen: anIdentitySet + "This is the default implementation to match two parsers. This code can properly handle recursion. This is code is supposed to be overridden in subclasses that add new state." + + (self == aParser or: [ anIdentitySet includes: self ]) + ifTrue: [ ^ true ]. + anIdentitySet add: self. + ^ self class = aParser class and: [ self matchList: self children against: aParser children inContext: aDictionary seen: anIdentitySet ] +! + +matchList: matchList against: parserList inContext: aDictionary seen: aSet + ^ self matchList: matchList index: 1 against: parserList index: 1 inContext: aDictionary seen: aSet +! + +matchList: matchList index: matchIndex against: parserList index: parserIndex inContext: aDictionary seen: aSet + | parser currentIndex currentDictionary currentSeen parsers | + matchList size < matchIndex + ifTrue: [ ^ parserList size < parserIndex ]. + parser := matchList at: matchIndex. + parser class = PPListPattern ifTrue: [ + currentIndex := parserIndex - 1. + [ currentDictionary := aDictionary copy. + currentSeen := aSet copy. + parserList size < currentIndex or: [ + parsers := parserList copyFrom: parserIndex to: currentIndex. + (currentDictionary at: parser ifAbsentPut: [ parsers ]) = parsers and: [ + (self + matchList: matchList + index: matchIndex + 1 + against: parserList + index: currentIndex + 1 + inContext: currentDictionary + seen: currentSeen) + ifTrue: [ + currentDictionary keysAndValuesDo: [ :key :value | aDictionary at: key put: value ]. + ^ true ]. + false ] ] ] whileFalse: [ currentIndex := currentIndex + 1 ]. + ^ false ]. + parserList size < parserIndex + ifTrue: [ ^ false ]. + (parser match: (parserList at: parserIndex) inContext: aDictionary seen: aSet) + ifFalse: [ ^ false ]. + ^ self + matchList: matchList + index: matchIndex + 1 + against: parserList + index: parserIndex + 1 + inContext: aDictionary + seen: aSet +! ! + +!PPParser methodsFor:'*petitanalyzer-named'! + +allNamedParsers + "Answer all the named parse nodes of the receiver." + + | result | + result := OrderedCollection new. + self allNamedParsersDo: [ :parser | result addLast: parser ]. + ^ result +! + +allNamedParsersDo: aBlock + "Iterate over all the named parse nodes of the receiver." + + self allParsersDo: [ :each | + each name notNil + ifTrue: [ aBlock value: each ] ] +! + +innerChildren + "Answer the inner children of the receiver." + + | result | + result := OrderedCollection new. + self innerChildrenDo: [ :parser | result addLast: parser ]. + ^ result +! + +innerChildrenDo: aBlock + "Iterate over the inner children of the receiver." + + self innerChildrenDo: aBlock seen: IdentitySet new +! + +innerChildrenDo: aBlock seen: aSet + "Iterate over the inner children of the receiver." + + self children do: [ :each | + (aSet includes: each) + ifTrue: [ ^ self ]. + aSet add: each. + each name isNil ifTrue: [ + aBlock value: each. + each innerChildrenDo: aBlock seen: aSet ] ] +! + +namedChildren + "Answer the named children of the receiver." + + | result | + result := OrderedCollection new. + self namedChildrenDo: [ :parser | result addLast: parser ]. + ^ result +! + +namedChildrenDo: aBlock + "Iterate over the named children of the receiver." + + self namedChildrenDo: aBlock seen: IdentitySet new +! + +namedChildrenDo: aBlock seen: aSet + "Iterate over the named children of the receiver." + + self children do: [ :each | + (aSet includes: each) + ifTrue: [ ^ self ]. + aSet add: each. + each name isNil + ifTrue: [ each namedChildrenDo: aBlock seen: aSet ] + ifFalse: [ aBlock value: each ] ] +! ! + +!PPParser methodsFor:'*petitanalyzer-private'! + +cycleSet: aDictionary + "PRIVATE: Answer the children that could be part of a cycle-set with the receiver, subclasses might restrict the number of children returned. aDictionary is pre-calcualted first-sets." + + ^ self children +! + +cycleSet: aStack firstSets: aDictionary into: aSet + "PRIVATE: Try to find a cycle, where aStack contains the previously visited parsers. The method returns quickly when the receiver is a terminal, terminals cannot be part of a cycle. If aStack already contains the receiver, then we are in a cycle. In this case we don't process the children further and add the nodes to aSet." + + | index | + self isTerminal + ifTrue: [ ^ self ]. + (index := aStack indexOf: self) > 0 + ifTrue: [ ^ aSet addAll: (aStack copyFrom: index to: aStack size) ]. + aStack addLast: self. + (self cycleSet: aDictionary) + do: [ :each | each cycleSet: aStack firstSets: aDictionary into: aSet ]. + aStack removeLast +! + +firstSets: aFirstDictionary into: aSet + "PRIVATE: Try to add additional elements to the first-set aSet of the receiver, use the incomplete aFirstDictionary." + + self children do: [ :parser | aSet addAll: (aFirstDictionary at: parser) ] +! + +followSets: aFollowDictionary firstSets: aFirstDictionary into: aSet + "PRIVATE: Try to add additional elements to the follow-set aSet of the receiver, use the incomplete aFollowDictionary and the complete aFirstDictionary." + + self children do: [ :parser | (aFollowDictionary at: parser) addAll: aSet ] +! ! + +!PPParser methodsFor:'*petitanalyzer-querying'! + +cycleSet + "Answer a set of all nodes that are within one or more cycles of left-recursion. This is generally not a problem if at least one of the nodes is memoized, but it might make the grammar very inefficient and should be avoided if possible." + + | cycles | + cycles := IdentitySet new. + self cycleSet: OrderedCollection new firstSets: self firstSets into: cycles. + ^ cycles +! + +firstSet + "Answer the first-set of the receiver. Note, this implementation is inefficient when called on different receivers of the same grammar, instead use #firstSets to calculate the first-sets at once." + + ^ self firstSets at: self +! + +firstSets + "Answer a dictionary with all the parsers reachable from the receiver as key and their first-set as value. The first-set of a parser is the list of terminal parsers that begin the parser derivable from that parser." + + | firstSets | + firstSets := IdentityDictionary new. + self allParsersDo: [ :each | + firstSets at: each put: (each isTerminal + ifTrue: [ IdentitySet with: each ] + ifFalse: [ IdentitySet new ]). + each isNullable + ifTrue: [ (firstSets at: each) add: PPSentinel instance ] ]. + [ | changed tally | + changed := false. + firstSets keysAndValuesDo: [ :parser :first | + tally := first size. + parser firstSets: firstSets into: first. + changed := changed or: [ tally ~= first size ] ]. + changed ] whileTrue. + ^ firstSets +! + +followSet + "Answer the follow-set of the receiver starting at the receiver. Note, this implementation is inefficient when called on different receivers of the same grammar, instead use #followSets to calculate the follow-sets at once." + + ^ self followSets at: self +! + +followSets + "Answer a dictionary with all the parsers reachable from the receiver as key and their follow-set as value. The follow-set of a parser is the list of terminal parsers that can appear immediately to the right of that parser." + + | current previous continue firstSets followSets | + current := previous := 0. + firstSets := self firstSets. + followSets := IdentityDictionary new. + self allParsersDo: [ :each | followSets at: each put: IdentitySet new ]. + (followSets at: self) add: PPSentinel instance. + [ followSets keysAndValuesDo: [ :parser :follow | + parser + followSets: followSets + firstSets: firstSets + into: follow ]. + current := followSets + inject: 0 + into: [ :result :each | result + each size ]. + continue := previous < current. + previous := current. + continue ] whileTrue. + ^ followSets +! ! + +!PPParser methodsFor:'*petitanalyzer-testing'! + +isNullable + "Answer true if the receiver is a nullable parser, e.g. it can successfully parse nothing." + + ^ false +! + +isTerminal + "Answer true if the receiver is a terminal or leaf parser, that means it does not delegate to any other parser." + + ^ self children isEmpty +! ! + +!PPParser methodsFor:'*petitanalyzer-transforming'! + +replace: aParser with: anotherParser + "Replace the references of the receiver pointing to aParser with anotherParser." +! + +transform: aBlock + "Answer a copy of all parsers reachable from the receiver transformed using aBlock." + + | mapping root | + mapping := IdentityDictionary new. + self allParsersDo: [ :each | + mapping + at: each + put: (aBlock value: each copy) ]. + root := mapping at: self. + [ | changed | + changed := false. + root allParsersDo: [ :each | + each children do: [ :old | + mapping at: old ifPresent: [ :new | + each replace: old with: new. + changed := true ] ] ]. + changed ] whileTrue. + ^ root +! ! + !PPParser methodsFor:'accessing'! children @@ -90,6 +408,8 @@ !PPParser methodsFor:'converting'! asParser + "Answer the receiving parser." + ^ self ! ! @@ -137,30 +457,12 @@ ^ PPEndOfInputParser on: self ! -max: anInteger - "Answer a new parser that parses the receiver at most anInteger times." - - ^ PPRepeatingParser on: self max: anInteger -! - memoized "Answer a new memoized parser, for refraining redundant computations. This ensures polynomial time O(n^4) for left-recursive grammars and O(n^3) for non left-recursive grammars in the worst case. Not necessary for most grammars that are carefully written and in O(n) anyway." ^ PPMemoizedParser on: self ! -min: anInteger - "Answer a new parser that parses the receiver at least anInteger times." - - ^ PPRepeatingParser on: self min: anInteger -! - -min: aMinInteger max: aMaxInteger - "Answer a new parser that parses the receiver at least aMinInteger and at most aMaxInteger times." - - ^ PPRepeatingParser on: self min: aMinInteger max: aMaxInteger -! - negate "Answer a new parser consumes any input token but the receiver." @@ -179,52 +481,6 @@ ^ PPOptionalParser on: self ! -plus - "Answer a new parser that parses the receiver one or more times." - - ^ self min: 1 -! - -plusGreedy: aParser - "Answer a new parser that parses the receiver one or more times until it reaches aParser. This is a greedy non-blind implementation of the star operator. aParser is not consumed." - - ^ self , (self starGreedy: aParser) map: [ :first :rest | rest copyWithFirst: first ] -! - -plusLazy: aParser - "Answer a new parser that parses the receiver one or more times until it reaches aParser. This is a lazy non-blind implementation of the star operator. aParser is not consumed." - - ^ self , (self starLazy: aParser) map: [ :first :rest | rest copyWithFirst: first ] -! - -star - "Answer a new parser that parses the receiver zero or more times. This is a greedy and blind implementation that tries to consume as much input as possible and it does not consider what comes afterwards." - - ^ PPRepeatingParser on: self -! - -starGreedy: aParser - "Answer a new parser that parses the receiver zero or more times until it reaches aParser. This is a greedy non-blind implementation of the star operator. aParser is not consumed." - - | parser | - parser := PPChoiceParser new. - parser setParsers: (Array - with: (self , parser map: [ :each :rest | rest addFirst: each; yourself ]) - with: (aParser and ==> [ :each | OrderedCollection new ])). - ^ parser ==> [ :rest | rest asArray ] -! - -starLazy: aParser - "Answer a new parser that parses the receiver zero or more times until it reaches aParser. This is a lazy non-blind implementation of the star operator. aParser is not consumed." - - | parser | - parser := PPChoiceParser new. - parser setParsers: (Array - with: (aParser and ==> [ :each | OrderedCollection new ]) - with: (self , parser map: [ :each :rest | rest addFirst: each; yourself ])). - ^ parser ==> [ :rest | rest asArray ] -! - times: anInteger "Answer a new parser that parses the receiver exactly anInteger times." @@ -292,42 +548,6 @@ ^ PPFlattenParser on: self ! -foldLeft: aBlock - "Answer a new parser that that folds the result of the receiver from left-to-right into aBlock. The argument aBlock must take two or more arguments." - - | size args | - size := aBlock numArgs. - args := Array new: size. - ^ self ==> [ :nodes | - args at: 1 put: (nodes at: 1). - 2 to: nodes size by: size - 1 do: [ :index | - args - replaceFrom: 2 to: size with: nodes startingAt: index; - at: 1 put: (aBlock valueWithArguments: args) ]. - args at: 1 ] -! - -foldRight: aBlock - "Answer a new parser that that folds the result of the receiver from right-to-left into aBlock. The argument aBlock must take two or more arguments." - - | size args | - size := aBlock numArgs. - args := Array new: size. - ^ self ==> [ :nodes | - args at: size put: (nodes at: nodes size). - nodes size - size + 1 to: 1 by: 1 - size do: [ :index | - args - replaceFrom: 1 to: size - 1 with: nodes startingAt: index; - at: size put: (aBlock valueWithArguments: args) ]. - args at: size ] -! - -map: aBlock - "Answer a new parser that works on the receiving sequence an passes in each element as a block argument." - - ^ self ==> aBlock -! - token "Answer a new parser that transforms the input to a token." @@ -344,18 +564,168 @@ "Answer a new parser that consumes spaces before and after the receiving parser." ^ self trimSpaces +! ! + +!PPParser methodsFor:'operators-convenience'! + +withoutSeparators + "Filters out the separators from a parse result produced by one of the productions #delimitedBy: or #separatedBy:." + + ^ self ==> [ :items | + | result | + result := Array new: items size + 1 // 2. + 1 to: result size do: [ :index | result at: index put: (items at: 2 * index - 1) ]. + result ] +! ! + +!PPParser methodsFor:'operators-mapping'! + +foldLeft: aBlock + "Answer a new parser that that folds the result of the receiver from left-to-right into aBlock. The argument aBlock must take two or more arguments." + + | size args | + size := aBlock numArgs. + args := Array new: size. + ^ self ==> [ :nodes | + args at: 1 put: nodes first. + 2 to: nodes size by: size - 1 do: [ :index | + args + replaceFrom: 2 to: size with: nodes startingAt: index; + at: 1 put: (aBlock valueWithArguments: args) ]. + args first ] +! + +foldRight: aBlock + "Answer a new parser that that folds the result of the receiver from right-to-left into aBlock. The argument aBlock must take two or more arguments." + + | size args | + size := aBlock numArgs. + args := Array new: size. + ^ self ==> [ :nodes | + args at: size put: nodes last. + nodes size - size + 1 to: 1 by: 1 - size do: [ :index | + args + replaceFrom: 1 to: size - 1 with: nodes startingAt: index; + at: size put: (aBlock valueWithArguments: args) ]. + args at: size ] +! + +map: aBlock + "Answer a new parser that works on the receiving sequence an passes in each element as a block argument." + + ^ aBlock numArgs = 1 + ifTrue: [ self ==> aBlock ] + ifFalse: [ self error: aBlock numArgs asString , ' arguments expected.' ] +! + +trim: aParser + "Answer a new parser that consumes and ignores aParser repeatedly before and after the receiving parser." + + ^ PPTrimmingParser on: self trimmer: aParser ! trimBlanks "Answer a new parser that consumes blanks before and after the receiving parser." - ^ PPTrimmingParser on: self trimmer: #blank asParser + ^ self trim: #blank asParser ! trimSpaces "Answer a new parser that consumes spaces before and after the receiving parser." - ^ PPTrimmingParser on: self trimmer: #space asParser + ^ self trim: #space asParser +! ! + +!PPParser methodsFor:'operators-repeating'! + +max: anInteger + "Answer a new parser that parses the receiver at most anInteger times." + + ^ self star setMax: anInteger +! + +max: anInteger greedy: aParser + "Answer a new parser that parses the receiver at most anInteger times until it reaches aParser. This is a greedy non-blind implementation. aParser is not consumed." + + ^ (self starGreedy: aParser) setMax: anInteger +! + +max: anInteger lazy: aParser + "Answer a new parser that parses the receiver at most anInteger times until it reaches aParser. This is a lazy non-blind implementation. aParser is not consumed." + + ^ (self starLazy: aParser) setMax: anInteger +! + +min: anInteger + "Answer a new parser that parses the receiver at least anInteger times." + + ^ self star setMin: anInteger +! + +min: anInteger greedy: aParser + "Answer a new parser that parses the receiver at least anInteger times until it reaches aParser. This is a greedy non-blind implementation. aParser is not consumed." + + ^ (self starGreedy: aParser) setMin: anInteger +! + +min: anInteger lazy: aParser + "Answer a new parser that parses the receiver at least anInteger times until it reaches aParser. This is a lazy non-blind implementation. aParser is not consumed." + + ^ (self starLazy: aParser) setMin: anInteger +! + +min: aMinInteger max: aMaxInteger + "Answer a new parser that parses the receiver at least aMinInteger and at most aMaxInteger times." + + ^ self star setMin: aMinInteger; setMax: aMaxInteger +! + +min: aMinInteger max: aMaxInteger greedy: aParser + "Answer a new parser that parses the receiver at least aMinInteger and at most aMaxInteger times until it reaches aParser. This is a greedy non-blind implementation. aParser is not consumed." + + ^ (self starGreedy: aParser) setMin: aMinInteger; setMax: aMaxInteger +! + +min: aMinInteger max: aMaxInteger lazy: aParser + "Answer a new parser that parses the receiver at least aMinInteger and at most aMaxInteger times until it reaches aParser. This is a greedy non-blind implementation. aParser is not consumed." + + ^ (self starLazy: aParser) setMin: aMinInteger; setMax: aMaxInteger +! + +plus + "Answer a new parser that parses the receiver one or more times." + + ^ self star setMin: 1 +! + +plusGreedy: aParser + "Answer a new parser that parses the receiver one or more times until it reaches aParser. This is a greedy non-blind implementation of the star operator. aParser is not consumed." + + ^ (self starGreedy: aParser) setMin: 1 +! + +plusLazy: aParser + "Answer a new parser that parses the receiver one or more times until it reaches aParser. This is a lazy non-blind implementation of the star operator. aParser is not consumed." + + ^ (self starLazy: aParser) setMin: 1 +! + +star + "Answer a new parser that parses the receiver zero or more times. This is a greedy and blind implementation that tries to consume as much input as possible and it does not consider what comes afterwards." + + ^ PPPossessiveRepeatingParser on: self +! + +starGreedy: aParser + "Answer a new parser that parses the receiver zero or more times until it reaches aParser. This is a greedy non-blind implementation of the star operator. aParser is not consumed." + + ^ PPGreedyRepeatingParser on: self limit: aParser +! + +starLazy: aParser + "Answer a new parser that parses the receiver zero or more times until it reaches aParser. This is a lazy non-blind implementation of the star operator. aParser is not consumed." + + ^ PPLazyRepeatingParser on: self limit: aParser ! ! !PPParser methodsFor:'parsing'! @@ -367,7 +737,7 @@ ! matchesIn: anObject - "Search anObject repeatedly for the matches of the receiver." + "Search anObject repeatedly for the matches of the receiver. Answered an OrderedCollection of the matched parse-trees." | result | result := OrderedCollection new. @@ -383,8 +753,25 @@ ((self and ==> aBlock , #any asParser) / #any asParser) star parse: anObject ! +matchesSkipIn: anObject + "Search anObject repeatedly for the matches of the receiver. Answer an OrderedCollection of the matched parse-trees. Skip over matches." + + | result | + result := OrderedCollection new. + self + matchesSkipIn: anObject + do: [ :each | result addLast: each ]. + ^ result +! + +matchesSkipIn: anObject do: aBlock + "Search anObject repeatedly for the matches of the receiver. Evaluate aBlock for each match with the matched parse-tree as the argument. Skip over matches." + + (self ==> aBlock / #any asParser) star parse: anObject +! + matchingRangesIn: anObject - "Search anObject repeatedly for the matches of the receiver. Answer an OrderedCollection of ranges of each match (index of first character to: index of last character)." + "Search anObject repeatedly for the matches of the receiver. Answer an OrderedCollection of ranges of each match (index of first character to: index of last character)." | result | result := OrderedCollection new. @@ -395,16 +782,32 @@ ! matchingRangesIn: anObject do: aBlock - "Search anObject repeatedly for the matches of the receiver. Answer an OrderedCollection of ranges of each match (index of first character to: index of last character)." + "Search anObject repeatedly for the matches of the receiver. Evaluate aBlock with the range of each match (index of first character to: index of last character)." + + self token + matchesIn: anObject + do: [ :token | aBlock value: (token start to: token stop) ] +! + +matchingSkipRangesIn: anObject + "Search anObject repeatedly for the matches of the receiver. Skip over matches. Answer an OrderedCollection of ranges of each match (index of first character to: index of last character)." | result | result := OrderedCollection new. - [ :stream | stream position + 1 ] asParser , self , [ :stream | stream position ] asParser - matchesIn: anObject - do: [ :value | aBlock value: (value first to: value last) ]. + self + matchingSkipRangesIn: anObject + do: [ :value | result addLast: value ]. ^ result ! +matchingSkipRangesIn: anObject do: aBlock + "Search anObject repeatedly for the matches of the receiver. Skip over matches. Evaluate aBlock with the range of each match (index of first character to: index of last character)." + + self token + matchesSkipIn: anObject + do: [ :token | aBlock value: (token start to: token stop) ] +! + parse: anObject "Parse anObject with the receiving parser and answer the parse-result or an instance of PPFailure." @@ -440,18 +843,10 @@ ! printOn: aStream - PPPrinter notNil ifTrue:[ - PPPrinter new - stream: aStream ; - visit: self - ] ifFalse:[ - super printOn: aStream. - aStream nextPut: $(. - self printNameOn: aStream. - aStream nextPut: $) - ]. - - "Modified: / 11-01-2013 / 09:20:09 / Jan Vrany " + super printOn: aStream. + aStream nextPut: $(. + self printNameOn: aStream. + aStream nextPut: $) ! ! !PPParser methodsFor:'testing'! @@ -464,17 +859,17 @@ ^ false ! ! - !PPParser class methodsFor:'documentation'! version - ^ '$Header: /cvs/stx/stx/goodies/petitparser/PPParser.st,v 1.5 2013-01-11 12:31:10 vrany Exp $' + ^ '$Header: /cvs/stx/stx/goodies/petitparser/PPParser.st,v 1.6 2014-03-04 14:33:11 cg Exp $' ! version_CVS - ^ '$Header: /cvs/stx/stx/goodies/petitparser/PPParser.st,v 1.5 2013-01-11 12:31:10 vrany Exp $' + ^ '$Header: /cvs/stx/stx/goodies/petitparser/PPParser.st,v 1.6 2014-03-04 14:33:11 cg Exp $' ! version_SVN - ^ '§Id: PPParser.st 2 2010-12-17 18:44:23Z vranyj1 §' + ^ '$Id: PPParser.st,v 1.6 2014-03-04 14:33:11 cg Exp $' ! ! +