PPParser.st
changeset 0 739fe9b7253e
child 4 90de244a7fa2
equal deleted inserted replaced
-1:000000000000 0:739fe9b7253e
       
     1 "{ Package: 'squeak:petitparser' }"
       
     2 
       
     3 Object subclass:#PPParser
       
     4 	instanceVariableNames:'properties'
       
     5 	classVariableNames:''
       
     6 	poolDictionaries:''
       
     7 	category:'PetitParser-Parsers'
       
     8 !
       
     9 
       
    10 PPParser comment:'An abstract parser for all parsers in PetitParser. Subclasses implement #parseOn: to perform the actual recursive-descent parsing. All parsers support a variety of methods to perform an actual parse, see the methods in the #parsing protocol. Parsers are combined with a series of operators that can be found in the #operations protocol.
       
    11 Instance Variables:
       
    12 	properties	<Dictionary>	Stores additional state in the parser object.'
       
    13 !
       
    14 
       
    15 
       
    16 !PPParser class methodsFor:'instance creation'!
       
    17 
       
    18 named: aString
       
    19 	^ self new name: aString
       
    20 !
       
    21 
       
    22 new
       
    23 	^ self basicNew initialize
       
    24 ! !
       
    25 
       
    26 !PPParser methodsFor:'accessing'!
       
    27 
       
    28 children
       
    29 	"Answer a set of child parsers that could follow the receiver."
       
    30 
       
    31 	^ #()
       
    32 !
       
    33 
       
    34 name
       
    35 	"Answer the production name of the receiver."
       
    36 	
       
    37 	^ self propertyAt: #name ifAbsent: [ nil ]
       
    38 !
       
    39 
       
    40 name: aString
       
    41 	self propertyAt: #name put: aString
       
    42 ! !
       
    43 
       
    44 !PPParser methodsFor:'accessing-properties'!
       
    45 
       
    46 hasProperty: aKey
       
    47 	"Test if the property aKey is present."
       
    48 	
       
    49 	^ properties notNil and: [ properties includesKey: aKey ]
       
    50 !
       
    51 
       
    52 propertyAt: aKey
       
    53 	"Answer the property value associated with aKey."
       
    54 	
       
    55 	^ self propertyAt: aKey ifAbsent: [ self error: 'Property not found' ]
       
    56 !
       
    57 
       
    58 propertyAt: aKey ifAbsent: aBlock
       
    59 	"Answer the property value associated with aKey or, if aKey isn't found, answer the result of evaluating aBlock."
       
    60 	
       
    61 	^ properties isNil
       
    62 		ifTrue: [ aBlock value ]
       
    63 		ifFalse: [ properties at: aKey ifAbsent: aBlock ]
       
    64 !
       
    65 
       
    66 propertyAt: aKey ifAbsentPut: aBlock
       
    67 	"Answer the property associated with aKey or, if aKey isn't found store the result of evaluating aBlock as new value."
       
    68 	
       
    69 	^ self propertyAt: aKey ifAbsent: [ self propertyAt: aKey put: aBlock value ]
       
    70 !
       
    71 
       
    72 propertyAt: aKey put: anObject
       
    73 	"Set the property at aKey to be anObject. If aKey is not found, create a new entry for aKey and set is value to anObject. Answer anObject."
       
    74 
       
    75 	^ (properties ifNil: [ properties := Dictionary new: 1 ])
       
    76 		at: aKey put: anObject
       
    77 !
       
    78 
       
    79 removeProperty: aKey
       
    80 	"Remove the property with aKey. Answer the property or raise an error if aKey isn't found."
       
    81 	
       
    82 	^ self removeProperty: aKey ifAbsent: [ self error: 'Property not found' ]
       
    83 !
       
    84 
       
    85 removeProperty: aKey ifAbsent: aBlock
       
    86 	"Remove the property with aKey. Answer the value or, if aKey isn't found, answer the result of evaluating aBlock."
       
    87 	
       
    88 	| answer |
       
    89 	properties isNil ifTrue: [ ^ aBlock value ].
       
    90 	answer := properties removeKey: aKey ifAbsent: aBlock.
       
    91 	properties isEmpty ifTrue: [ properties := nil ].
       
    92 	^ answer
       
    93 ! !
       
    94 
       
    95 !PPParser methodsFor:'converting'!
       
    96 
       
    97 asParser
       
    98 	^ self
       
    99 ! !
       
   100 
       
   101 !PPParser methodsFor:'copying'!
       
   102 
       
   103 postCopy
       
   104 	super postCopy.
       
   105 	properties := properties copy
       
   106 ! !
       
   107 
       
   108 !PPParser methodsFor:'initialization'!
       
   109 
       
   110 initialize
       
   111 ! !
       
   112 
       
   113 !PPParser methodsFor:'operations'!
       
   114 
       
   115 , aParser 
       
   116 	"Answer a new parser that parses the receiver followed by aParser."
       
   117 
       
   118 	^ PPSequenceParser with: self with: aParser
       
   119 !
       
   120 
       
   121 / aParser 
       
   122 	"Answer a new parser that parses the receiver, if the receiver fails try with aParser (ordered-choice)."
       
   123 	
       
   124 	^ PPChoiceParser with: self with: aParser
       
   125 !
       
   126 
       
   127 and
       
   128 	"Answer a new parser (logical and-predicate) that succeeds whenever the receiver does, but never consumes input."
       
   129 
       
   130 	^ PPAndParser on: self
       
   131 !
       
   132 
       
   133 def: aParser
       
   134 	"Redefine the receiver as the argument aParser. This method is useful when defining recursive parsers: instantiate a PPUnresolvedParser and later redefine it with another one."
       
   135 
       
   136 	^ self becomeForward: (aParser name: self name)
       
   137 !
       
   138 
       
   139 end
       
   140 	"Answer a new parser that succeeds at the end of the input and return the result of the receiver."
       
   141 
       
   142 	^ PPEndOfInputParser on: self
       
   143 !
       
   144 
       
   145 max: anInteger
       
   146 	"Answer a new parser that parses the receiver at most anInteger times."
       
   147 	
       
   148 	^ PPRepeatingParser on: self max: anInteger
       
   149 !
       
   150 
       
   151 memoized
       
   152 	"Answer a new memoized parser, for refraining redundant computations. This ensures polynomial time O(n^4) for left-recursive grammars and O(n^3) for non left-recursive grammars in the worst case. Not necessary for most grammars that are carefully written and in O(n) anyway."
       
   153 	
       
   154 	^ PPMemoizedParser on: self
       
   155 !
       
   156 
       
   157 min: anInteger
       
   158 	"Answer a new parser that parses the receiver at least anInteger times."
       
   159 	
       
   160 	^ PPRepeatingParser on: self min: anInteger
       
   161 !
       
   162 
       
   163 min: aMinInteger max: aMaxInteger
       
   164 	"Answer a new parser that parses the receiver at least aMinInteger and at most aMaxInteger times."
       
   165 	
       
   166 	^ PPRepeatingParser on: self min: aMinInteger max: aMaxInteger
       
   167 !
       
   168 
       
   169 negate
       
   170 	"Answer a new parser consumes any input token but the receiver."
       
   171 	
       
   172 	^ self not , #any asParser ==> #second
       
   173 !
       
   174 
       
   175 not
       
   176 	"Answer a new parser (logical not-predicate) that succeeds whenever the receiver fails, but never consumes input."
       
   177 
       
   178 	^ PPNotParser on: self
       
   179 !
       
   180 
       
   181 optional
       
   182 	"Answer a new parser that parses the receiver, if possible."
       
   183 
       
   184 	^ PPOptionalParser on: self
       
   185 !
       
   186 
       
   187 plus
       
   188 	"Answer a new parser that parses the receiver one or more times."
       
   189 
       
   190 	^ self min: 1
       
   191 !
       
   192 
       
   193 plusGreedy: aParser
       
   194 	"Answer a new parser that parses the receiver one or more times until it reaches aParser. This is a greedy non-blind implementation of the star operator. aParser is not consumed."
       
   195 	
       
   196 	^ self , (self starGreedy: aParser) map: [ :first :rest | rest copyWithFirst: first ]
       
   197 !
       
   198 
       
   199 plusLazy: aParser
       
   200 	"Answer a new parser that parses the receiver one or more times until it reaches aParser. This is a lazy non-blind implementation of the star operator. aParser is not consumed."
       
   201 	
       
   202 	^ self , (self starLazy: aParser) map: [ :first :rest | rest copyWithFirst: first ]
       
   203 !
       
   204 
       
   205 star
       
   206 	"Answer a new parser that parses the receiver zero or more times. This is a greedy and blind implementation that tries to consume as much input as possible and it does not consider what comes afterwards."
       
   207 
       
   208 	^ PPRepeatingParser on: self
       
   209 !
       
   210 
       
   211 starGreedy: aParser
       
   212 	"Answer a new parser that parses the receiver zero or more times until it reaches aParser. This is a greedy non-blind implementation of the star operator. aParser is not consumed."
       
   213 	
       
   214 	| parser |
       
   215 	parser := PPChoiceParser new.
       
   216 	parser setParsers: (Array
       
   217 		with: (self , parser map: [ :each :rest | rest addFirst: each; yourself ])
       
   218 		with: (aParser and ==> [ :each | OrderedCollection new ])).
       
   219 	^ parser ==> [ :rest | rest asArray ]
       
   220 !
       
   221 
       
   222 starLazy: aParser
       
   223 	"Answer a new parser that parses the receiver zero or more times until it reaches aParser. This is a lazy non-blind implementation of the star operator. aParser is not consumed."
       
   224 	
       
   225 	| parser |
       
   226 	parser := PPChoiceParser new.
       
   227 	parser setParsers: (Array
       
   228 		with: (aParser and ==> [ :each | OrderedCollection new ])
       
   229 		with: (self , parser map: [ :each :rest | rest addFirst: each; yourself ])).
       
   230 	^ parser ==> [ :rest | rest asArray ]
       
   231 !
       
   232 
       
   233 times: anInteger
       
   234 	"Answer a new parser that parses the receiver exactly anInteger times."
       
   235 	
       
   236 	^ self min: anInteger max: anInteger
       
   237 !
       
   238 
       
   239 wrapped
       
   240 	"Answer a new parser that is simply wrapped."
       
   241 	
       
   242 	^ PPDelegateParser on: self
       
   243 !
       
   244 
       
   245 | aParser
       
   246 	"Answer a new parser that either parses the receiver or aParser. Fail if both pass or fail (exclusive choice, unordered choice)."
       
   247 
       
   248 	^ (self not , aParser) / (aParser not , self) ==> #second
       
   249 ! !
       
   250 
       
   251 !PPParser methodsFor:'operations-convenience'!
       
   252 
       
   253 delimitedBy: aParser
       
   254 	"Answer a new parser that parses the receiver one or more times, separated and possibly ended by aParser."
       
   255 	
       
   256 	^ (self separatedBy: aParser) , (aParser optional) ==> [ :node |
       
   257 		node second isNil
       
   258 			ifTrue: [ node first ]
       
   259 			ifFalse: [ node first copyWith: node second ] ]
       
   260 !
       
   261 
       
   262 separatedBy: aParser
       
   263 	"Answer a new parser that parses the receiver one or more times, separated by aParser."
       
   264 	
       
   265 	^ (PPSequenceParser with: self with: (PPSequenceParser with: aParser with: self) star) ==> [ :nodes |
       
   266 		| result |
       
   267 		result := Array new: 2 * nodes second size + 1.
       
   268 		result at: 1 put: nodes first.
       
   269 		nodes second 
       
   270 			keysAndValuesDo: [ :index :pair | result replaceFrom: 2 * index to: 2 * index + 1 with: pair startingAt: 1 ].
       
   271 		result ]
       
   272 ! !
       
   273 
       
   274 !PPParser methodsFor:'operations-mapping'!
       
   275 
       
   276 ==> aBlock
       
   277 	"Answer a new parser that performs aBlock as action handler on success."
       
   278 
       
   279 	^ PPActionParser on: self block: aBlock
       
   280 !
       
   281 
       
   282 >=> aBlock
       
   283 	"Answer a new parser that wraps the receiving parser with a two argument block. The first argument is the parsed stream, the second argument a continuation block on the delegate parser."
       
   284 
       
   285 	^ PPWrappingParser on: self block: aBlock
       
   286 !
       
   287 
       
   288 answer: anObject
       
   289 	"Answer a new parser that always returns anObject from a successful parse."
       
   290 
       
   291 	^ self ==> [ :nodes | anObject ]
       
   292 !
       
   293 
       
   294 flatten
       
   295 	"Answer a new parser that flattens the underlying collection."
       
   296 	
       
   297 	^ PPFlattenParser on: self
       
   298 !
       
   299 
       
   300 foldLeft: aBlock
       
   301 	"Answer a new parser that that folds the result of the receiver from left-to-right into aBlock. The argument aBlock must take two or more arguments."
       
   302 	
       
   303 	| size args |
       
   304 	size := aBlock numArgs.
       
   305 	args := Array new: size.
       
   306 	^ self ==> [ :nodes |
       
   307 		args at: 1 put: (nodes at: 1).
       
   308 		2 to: nodes size by: size - 1 do: [ :index |
       
   309 			args
       
   310 				replaceFrom: 2 to: size with: nodes startingAt: index;
       
   311 				at: 1 put: (aBlock valueWithArguments: args) ].
       
   312 		args at: 1 ]
       
   313 !
       
   314 
       
   315 foldRight: aBlock
       
   316 	"Answer a new parser that that folds the result of the receiver from right-to-left into aBlock. The argument aBlock must take two or more arguments."
       
   317 
       
   318 	| size args |
       
   319 	size := aBlock numArgs.
       
   320 	args := Array new: size.
       
   321 	^ self ==> [ :nodes |
       
   322 		args at: size put: (nodes at: nodes size).
       
   323 		nodes size - size + 1 to: 1 by: 1 - size do: [ :index |
       
   324 			args
       
   325 				replaceFrom: 1 to: size - 1 with: nodes startingAt: index;
       
   326 				at: size put: (aBlock valueWithArguments: args) ].
       
   327 		args at: size ]
       
   328 !
       
   329 
       
   330 map: aBlock
       
   331 	"Answer a new parser that works on the receiving sequence an passes in each element as a block argument."
       
   332 	
       
   333 	^ self ==> aBlock
       
   334 !
       
   335 
       
   336 token
       
   337 	"Answer a new parser that transforms the input to a token."
       
   338 	
       
   339 	^ PPTokenParser on: self
       
   340 !
       
   341 
       
   342 token: aTokenClass
       
   343 	"Answer a new parser that transforms the input to a token of class aTokenClass."
       
   344 	
       
   345 	^ self token tokenClass: aTokenClass
       
   346 !
       
   347 
       
   348 trim
       
   349 	"Answer a new parser that consumes spaces before and after the receiving parser."
       
   350 	
       
   351 	^ self trimSpaces
       
   352 !
       
   353 
       
   354 trimBlanks
       
   355 	"Answer a new parser that consumes blanks before and after the receiving parser."
       
   356 	
       
   357 	^ PPTrimmingParser on: self trimmer: #blank asParser
       
   358 !
       
   359 
       
   360 trimSpaces
       
   361 	"Answer a new parser that consumes spaces before and after the receiving parser."
       
   362 	
       
   363 	^ PPTrimmingParser on: self trimmer: #space asParser
       
   364 ! !
       
   365 
       
   366 !PPParser methodsFor:'parsing'!
       
   367 
       
   368 matches: anObject
       
   369 	"Answer if anObject can be parsed by the receiver."
       
   370 	
       
   371 	^ (self parse: anObject) isPetitFailure not
       
   372 !
       
   373 
       
   374 matchesIn: anObject
       
   375 	"Search anObject repeatedly for the matches of the receiver."
       
   376 
       
   377 	| result |
       
   378 	result := OrderedCollection new.
       
   379 	self 
       
   380 		matchesIn: anObject
       
   381 		do: [ :each | result addLast: each ].
       
   382 	^ result
       
   383 !
       
   384 
       
   385 matchesIn: anObject do: aBlock
       
   386 	"Search anObject repeatedly for the matches of the receiver. Evaluate aBlock for each match with the matched parse-tree as the argument. Make sure to always consume exactly one character with each step, to not miss any match."
       
   387 
       
   388 	((self and ==> aBlock , #any asParser) / #any asParser) star parse: anObject
       
   389 !
       
   390 
       
   391 matchingRangesIn: anObject
       
   392 	"Search anObject repeatedly for the matches of the receiver.  Answer an OrderedCollection of ranges of each match (index of first character to: index of last character)."
       
   393 	
       
   394 	| result |
       
   395 	result := OrderedCollection new.
       
   396 	self
       
   397 		matchingRangesIn: anObject
       
   398 		do: [ :value | result addLast: value ].
       
   399 	^ result
       
   400 !
       
   401 
       
   402 matchingRangesIn: anObject do: aBlock
       
   403 	"Search anObject repeatedly for the matches of the receiver.  Answer an OrderedCollection of ranges of each match (index of first character to: index of last character)."
       
   404 	
       
   405 	| result |
       
   406 	result := OrderedCollection new.
       
   407 	[ :stream | stream position + 1 ] asParser , self , [ :stream | stream position ] asParser
       
   408 		matchesIn: anObject
       
   409 		do: [ :value | aBlock value: (value first to: value last) ].
       
   410 	^ result
       
   411 !
       
   412 
       
   413 parse: anObject
       
   414 	"Parse anObject with the receiving parser and answer the parse-result or an instance of PPFailure."
       
   415 	
       
   416 	^ self parseOn: anObject asPetitStream
       
   417 !
       
   418 
       
   419 parse: anObject onError: aBlock
       
   420 	"Parse anObject with the receiving parser and answer the parse-result or answer the result of evaluating aBlock. Depending on the number of arguments of the block it is simply evaluated, evaluated with the failure object, or evaluated with the error message and position."
       
   421 	
       
   422 	| result |
       
   423 	result := self parse: anObject.
       
   424 	result isPetitFailure
       
   425 		ifFalse: [ ^ result ].
       
   426 	aBlock numArgs = 0
       
   427 		ifTrue: [ ^ aBlock value ].
       
   428 	aBlock numArgs = 1
       
   429 		ifTrue: [ ^ aBlock value: result ].
       
   430 	^ aBlock value: result message value: result position
       
   431 !
       
   432 
       
   433 parseOn: aStream
       
   434 	"Parse aStream with the receiving parser and answer the parse-result or an instance of PPFailure. Override this method in subclasses to specify custom parse behavior. Do not call this method from outside, instead use #parse:."
       
   435 	
       
   436 	self subclassResponsibility
       
   437 ! !
       
   438 
       
   439 !PPParser methodsFor:'printing'!
       
   440 
       
   441 printNameOn: aStream
       
   442 	self name isNil
       
   443 		ifTrue: [ aStream print: self hash ]
       
   444 		ifFalse: [ aStream nextPutAll: self name ]
       
   445 !
       
   446 
       
   447 printOn: aStream
       
   448 	super printOn: aStream.
       
   449 	aStream nextPut: $(.
       
   450 	self printNameOn: aStream.
       
   451 	aStream nextPut: $)
       
   452 ! !
       
   453 
       
   454 !PPParser methodsFor:'testing'!
       
   455 
       
   456 isPetitParser
       
   457 	^ true
       
   458 !
       
   459 
       
   460 isUnresolved
       
   461 	^ false
       
   462 ! !
       
   463 
       
   464 !PPParser class methodsFor:'documentation'!
       
   465 
       
   466 version_SVN
       
   467     ^ '$Id: PPParser.st,v 1.1 2011-08-18 18:56:17 cg Exp $'
       
   468 ! !