parsers/smalltalk/PPSmalltalkGrammar.st
changeset 385 44a36ed4e484
child 386 a409905f7f2d
equal deleted inserted replaced
384:a613ecf5d2a1 385:44a36ed4e484
       
     1 "{ Package: 'stx:goodies/petitparser/parsers/smalltalk' }"
       
     2 
       
     3 PPCompositeParser subclass:#PPSmalltalkGrammar
       
     4 	instanceVariableNames:'array arrayItem arrayLiteral arrayLiteralArray assignment
       
     5 		assignmentToken binary binaryExpression binaryMessage
       
     6 		binaryMethod binaryPragma binaryToken block blockArgument
       
     7 		blockArguments blockArgumentsWith blockArgumentsWithout blockBody
       
     8 		byteLiteral byteLiteralArray cascadeExpression cascadeMessage
       
     9 		char charLiteral charToken expression falseLiteral falseToken
       
    10 		identifier identifierToken keyword keywordExpression
       
    11 		keywordMessage keywordMethod keywordPragma keywordToken literal
       
    12 		message method methodDeclaration methodSequence multiword
       
    13 		nilLiteral nilToken number numberLiteral numberToken parens
       
    14 		period periodToken pragma pragmaMessage pragmas primary return
       
    15 		sequence startExpression startMethod statements string
       
    16 		stringLiteral stringToken symbol symbolLiteral symbolLiteralArray
       
    17 		temporaries trueLiteral trueToken unary unaryExpression
       
    18 		unaryMessage unaryMethod unaryPragma unaryToken variable'
       
    19 	classVariableNames:''
       
    20 	poolDictionaries:''
       
    21 	category:'PetitSmalltalk-Core'
       
    22 !
       
    23 
       
    24 PPSmalltalkGrammar comment:'A parser for Smalltalk methods and expressions.'
       
    25 !
       
    26 
       
    27 !PPSmalltalkGrammar class methodsFor:'accessing'!
       
    28 
       
    29 parseExpression: aString
       
    30 	^ self new parseExpression: aString
       
    31 !
       
    32 
       
    33 parseExpression: aString onError: aBlock
       
    34 	^ self new parseExpression: aString onError: aBlock
       
    35 !
       
    36 
       
    37 parseMethod: aString
       
    38 	^ self new parseMethod: aString
       
    39 !
       
    40 
       
    41 parseMethod: aString onError: aBlock
       
    42 	^ self new parseMethod: aString onError: aBlock
       
    43 ! !
       
    44 
       
    45 !PPSmalltalkGrammar class methodsFor:'testing'!
       
    46 
       
    47 allowUnderscoreAssignment
       
    48 	^ (Scanner respondsTo: #allowUnderscoreAsAssignment) and: [ Scanner allowUnderscoreAsAssignment ]
       
    49 ! !
       
    50 
       
    51 !PPSmalltalkGrammar methodsFor:'accessing'!
       
    52 
       
    53 start
       
    54 	"Default start production."
       
    55 
       
    56 	^ startMethod
       
    57 !
       
    58 
       
    59 startExpression
       
    60 	"Start production for the expression."
       
    61 
       
    62 	^ sequence end
       
    63 !
       
    64 
       
    65 startMethod
       
    66 	"Start production for the method."
       
    67 
       
    68 	^ method end
       
    69 ! !
       
    70 
       
    71 !PPSmalltalkGrammar methodsFor:'grammar'!
       
    72 
       
    73 array
       
    74 	^ ${ asParser smalltalkToken , (expression delimitedBy: periodToken) optional , $} asParser smalltalkToken
       
    75 !
       
    76 
       
    77 assignment
       
    78 	^ variable , assignmentToken
       
    79 !
       
    80 
       
    81 expression
       
    82 	^ assignment star , cascadeExpression
       
    83 !
       
    84 
       
    85 literal
       
    86 	^ numberLiteral / stringLiteral / charLiteral / arrayLiteral / byteLiteral / symbolLiteral / nilLiteral / trueLiteral / falseLiteral
       
    87 !
       
    88 
       
    89 message
       
    90 	^ keywordMessage / binaryMessage / unaryMessage
       
    91 !
       
    92 
       
    93 method
       
    94 	^ methodDeclaration , methodSequence
       
    95 !
       
    96 
       
    97 methodDeclaration
       
    98 	^ keywordMethod / unaryMethod / binaryMethod
       
    99 !
       
   100 
       
   101 methodSequence
       
   102 	^ periodToken star , pragmas , periodToken star , temporaries , periodToken star , pragmas , periodToken star , statements
       
   103 !
       
   104 
       
   105 parens
       
   106 	^ $( asParser smalltalkToken , expression , $) asParser smalltalkToken
       
   107 !
       
   108 
       
   109 pragma
       
   110 	^ $< asParser smalltalkToken , pragmaMessage , $> asParser smalltalkToken
       
   111 !
       
   112 
       
   113 pragmas
       
   114 	^ pragma star
       
   115 !
       
   116 
       
   117 primary
       
   118 	^ literal / variable / block / parens / array
       
   119 !
       
   120 
       
   121 return
       
   122 	^ $^ asParser smalltalkToken , expression
       
   123 !
       
   124 
       
   125 sequence
       
   126 	^ temporaries , periodToken star , statements
       
   127 !
       
   128 
       
   129 statements
       
   130 	^ 	(expression wrapped , ((periodToken plus , statements ==> [ :nodes | nodes first , nodes last ])
       
   131 								/ periodToken star)
       
   132 			==> [ :nodes | (Array with: nodes first) , (nodes last) ])
       
   133 	/	(return , periodToken star
       
   134 			==> [ :nodes | (Array with: nodes first) , (nodes last) ])
       
   135 	/	(periodToken star)
       
   136 !
       
   137 
       
   138 temporaries
       
   139 	^ ($| asParser smalltalkToken , variable star , $| asParser smalltalkToken) optional ==> [ :nodes | nodes ifNil: [ #() ] ]
       
   140 !
       
   141 
       
   142 variable
       
   143 	^ identifierToken
       
   144 ! !
       
   145 
       
   146 !PPSmalltalkGrammar methodsFor:'grammar-blocks'!
       
   147 
       
   148 block
       
   149 	^ $[ asParser smalltalkToken , blockBody , $] asParser smalltalkToken
       
   150 !
       
   151 
       
   152 blockArgument
       
   153 	^ $: asParser smalltalkToken , variable
       
   154 !
       
   155 
       
   156 blockArguments
       
   157 	^ blockArgumentsWith / blockArgumentsWithout
       
   158 !
       
   159 
       
   160 blockArgumentsWith
       
   161 	^ blockArgument plus , ($| asParser smalltalkToken / ($] asParser smalltalkToken and ==> [ :node | nil ]))
       
   162 !
       
   163 
       
   164 blockArgumentsWithout
       
   165 	^ nil asParser ==> [ :nodes | Array with: #() with: nil ]
       
   166 !
       
   167 
       
   168 blockBody
       
   169 	^ blockArguments , sequence
       
   170 ! !
       
   171 
       
   172 !PPSmalltalkGrammar methodsFor:'grammar-literals'!
       
   173 
       
   174 arrayItem
       
   175 	^ literal / symbolLiteralArray / arrayLiteralArray / byteLiteralArray
       
   176 !
       
   177 
       
   178 arrayLiteral
       
   179 	^ '#(' asParser smalltalkToken , arrayItem star , $) asParser smalltalkToken
       
   180 !
       
   181 
       
   182 arrayLiteralArray
       
   183 	^ $( asParser smalltalkToken , arrayItem star , $) asParser smalltalkToken
       
   184 !
       
   185 
       
   186 byteLiteral
       
   187 	^ '#[' asParser smalltalkToken , numberLiteral star , $] asParser smalltalkToken
       
   188 !
       
   189 
       
   190 byteLiteralArray
       
   191 	^ $[ asParser smalltalkToken , numberLiteral star , $] asParser smalltalkToken
       
   192 !
       
   193 
       
   194 charLiteral
       
   195 	^ charToken
       
   196 !
       
   197 
       
   198 falseLiteral
       
   199 	^ falseToken
       
   200 !
       
   201 
       
   202 nilLiteral
       
   203 	^ nilToken
       
   204 !
       
   205 
       
   206 numberLiteral
       
   207 	^ numberToken
       
   208 !
       
   209 
       
   210 stringLiteral
       
   211 	^ stringToken
       
   212 !
       
   213 
       
   214 symbolLiteral
       
   215 	"This is totally fucked up: The Pharo compiler allows multiple #, arbitrary spaces between the # and the symbol, as well as comments inbetween. And yes, it is used."
       
   216 
       
   217 	^ $# asParser smalltalkToken plus , symbol smalltalkToken ==> [ :tokens | tokens first copyWith: tokens last ]
       
   218 !
       
   219 
       
   220 symbolLiteralArray
       
   221 	^ symbol smalltalkToken
       
   222 !
       
   223 
       
   224 trueLiteral
       
   225 	^ trueToken
       
   226 ! !
       
   227 
       
   228 !PPSmalltalkGrammar methodsFor:'grammar-messages'!
       
   229 
       
   230 binaryExpression
       
   231 	^ unaryExpression , binaryMessage star
       
   232 !
       
   233 
       
   234 binaryMessage
       
   235 	^ (binaryToken , unaryExpression) ==> [ :nodes | 
       
   236 		Array 
       
   237 			with: (Array with: nodes first)
       
   238 			with: (Array with: nodes second) ]
       
   239 !
       
   240 
       
   241 cascadeExpression
       
   242 	^ keywordExpression , cascadeMessage star
       
   243 !
       
   244 
       
   245 cascadeMessage
       
   246 	^ $; asParser smalltalkToken , message
       
   247 !
       
   248 
       
   249 keywordExpression
       
   250 	^ binaryExpression , keywordMessage optional
       
   251 !
       
   252 
       
   253 keywordMessage
       
   254 	^ (keywordToken , binaryExpression) plus ==> [ :nodes | 
       
   255 		Array 
       
   256 			with: (nodes collect: [ :each | each first ])
       
   257 			with: (nodes collect: [ :each | each second ]) ]
       
   258 !
       
   259 
       
   260 unaryExpression
       
   261 	^ primary , unaryMessage star
       
   262 !
       
   263 
       
   264 unaryMessage
       
   265 	^ unaryToken ==> [ :node | 
       
   266 		Array 
       
   267 			with: (Array with: node)
       
   268 			with: Array new ]
       
   269 ! !
       
   270 
       
   271 !PPSmalltalkGrammar methodsFor:'grammar-methods'!
       
   272 
       
   273 binaryMethod
       
   274 	^ (binaryToken , variable) ==> [ :nodes | 
       
   275 		Array 
       
   276 			with: (Array with: nodes first)
       
   277 			with: (Array with: nodes second) ]
       
   278 !
       
   279 
       
   280 keywordMethod
       
   281 	^ (keywordToken , variable) plus ==> [ :nodes | 
       
   282 		Array 
       
   283 			with: (nodes collect: [ :each | each first ])
       
   284 			with: (nodes collect: [ :each | each second ]) ]
       
   285 !
       
   286 
       
   287 unaryMethod
       
   288 	^ identifierToken ==> [ :node | 
       
   289 		Array 
       
   290 			with: (Array with: node)
       
   291 			with: Array new ]
       
   292 ! !
       
   293 
       
   294 !PPSmalltalkGrammar methodsFor:'grammar-pragmas'!
       
   295 
       
   296 binaryPragma
       
   297 	^ (binaryToken , arrayItem) ==> [ :nodes | 
       
   298 		Array 
       
   299 			with: (Array with: nodes first)
       
   300 			with: (Array with: nodes second) ]
       
   301 !
       
   302 
       
   303 keywordPragma
       
   304 	^ (keywordToken , arrayItem) plus ==> [ :nodes | 
       
   305 		Array 
       
   306 			with: (nodes collect: [ :each | each first ])
       
   307 			with: (nodes collect: [ :each | each second ]) ]
       
   308 !
       
   309 
       
   310 pragmaMessage
       
   311 	^ keywordPragma / unaryPragma / binaryPragma
       
   312 !
       
   313 
       
   314 unaryPragma
       
   315 	^ identifierToken ==> [ :node | 
       
   316 		Array 
       
   317 			with: (Array with: node)
       
   318 			with: (Array new) ]
       
   319 ! !
       
   320 
       
   321 !PPSmalltalkGrammar methodsFor:'parsing'!
       
   322 
       
   323 parseExpression: aString 
       
   324 	^ self parseExpression: aString onError: [ :msg :pos | self error: msg ]
       
   325 !
       
   326 
       
   327 parseExpression: aString onError: aBlock
       
   328 	^ startExpression parse: aString onError: aBlock
       
   329 !
       
   330 
       
   331 parseMethod: aString 
       
   332 	^ self parseMethod: aString onError: [ :msg :pos | self error: msg ]
       
   333 !
       
   334 
       
   335 parseMethod: aString onError: aBlock
       
   336 	^ startMethod parse: aString onError: aBlock
       
   337 ! !
       
   338 
       
   339 !PPSmalltalkGrammar methodsFor:'primitives'!
       
   340 
       
   341 binary
       
   342 	^ (PPPredicateObjectParser anyOf: '!!%&*+,-/<=>?@\|~') plus
       
   343 !
       
   344 
       
   345 char
       
   346 	^ $$ asParser , #any asParser
       
   347 !
       
   348 
       
   349 identifier
       
   350 	^ self class allowUnderscoreAssignment
       
   351 		ifTrue: [ #letter asParser , #word asParser star ]
       
   352 		ifFalse: [
       
   353 			(PPPredicateObjectParser
       
   354 				on: [ :each | each isLetter or: [ each = $_ ] ]
       
   355 				message: 'letter expected') ,
       
   356 			(PPPredicateObjectParser
       
   357 				on: [ :each | each isAlphaNumeric or: [ each = $_ ] ]
       
   358 				message: 'letter or digit expected') star ]
       
   359 !
       
   360 
       
   361 keyword
       
   362 	^ identifier , $: asParser
       
   363 !
       
   364 
       
   365 multiword
       
   366 	^ keyword plus
       
   367 !
       
   368 
       
   369 number
       
   370         ^ ($- asParser optional , #digit asParser) and , [ :context | 
       
   371                 [ (NumberParser on: context stream) nextNumber ] 
       
   372                         on: Error
       
   373                         do: [ :err | PPFailure message: err messageText at: context position ] ] 
       
   374                                 asParser
       
   375 
       
   376     "Modified: / 07-10-2014 / 09:10:35 / Jan Vrany <jan.vrany@fit.cvut.cz>"
       
   377 !
       
   378 
       
   379 period
       
   380 	^ $. asParser
       
   381 !
       
   382 
       
   383 string
       
   384 	^ $' asParser , ('''''' asParser / $' asParser negate) star , $' asParser
       
   385 !
       
   386 
       
   387 symbol
       
   388 	^ unary / binary / multiword / string
       
   389 !
       
   390 
       
   391 unary
       
   392 	^ identifier , $: asParser not
       
   393 ! !
       
   394 
       
   395 !PPSmalltalkGrammar methodsFor:'token'!
       
   396 
       
   397 assignmentToken
       
   398 	^ self class allowUnderscoreAssignment
       
   399 		ifTrue: [ (':=' asParser / '_' asParser) smalltalkToken ]
       
   400 		ifFalse: [ ':=' asParser smalltalkToken ]
       
   401 !
       
   402 
       
   403 binaryToken
       
   404 	^ binary smalltalkToken
       
   405 !
       
   406 
       
   407 charToken
       
   408 	^ char smalltalkToken
       
   409 !
       
   410 
       
   411 falseToken
       
   412 	^ ('false' asParser , #word asParser not) smalltalkToken
       
   413 !
       
   414 
       
   415 identifierToken
       
   416 	^ identifier smalltalkToken
       
   417 !
       
   418 
       
   419 keywordToken
       
   420 	^ keyword smalltalkToken
       
   421 !
       
   422 
       
   423 nilToken
       
   424 	^ ('nil' asParser , #word asParser not) smalltalkToken
       
   425 !
       
   426 
       
   427 numberToken
       
   428 	^ number smalltalkToken
       
   429 !
       
   430 
       
   431 periodToken
       
   432 	^ period smalltalkToken
       
   433 !
       
   434 
       
   435 stringToken
       
   436 	^ string smalltalkToken
       
   437 !
       
   438 
       
   439 trueToken
       
   440 	^ ('true' asParser , #word asParser not) smalltalkToken
       
   441 !
       
   442 
       
   443 unaryToken
       
   444 	^ unary smalltalkToken
       
   445 ! !
       
   446