|
1 "{ Package: 'stx:goodies/petitparser/parsers/java' }" |
|
2 |
|
3 "{ NameSpace: Smalltalk }" |
|
4 |
|
5 PPCompositeParser subclass:#PPJavaLexicon |
|
6 instanceVariableNames:'unicodeEscape rawInputCharacter unicodeMarker hexDigit |
|
7 lineTerminator unicodeInputCharacter inputElements sub |
|
8 inputElement whiteSpace comment javaToken keyword literal |
|
9 separator operator identifier traditionalComment endOfLineComment |
|
10 commentTail charactersInLine commentTailStar notStar |
|
11 notStarNotSlash inputCharacter booleanLiteral nullLiteral |
|
12 identifierChars javaLetter javaLetterOrDigit keywords |
|
13 floatingPointLiteral integerLiteral characterLiteral |
|
14 stringLiteral hexIntegerLiteral octalIntegerLiteral |
|
15 decimalIntegerLiteral decimalNumeral integerTypeSuffix hexNumeral |
|
16 octalNumeral nonZeroDigit digits hexDigits octalDigits octalDigit |
|
17 hexadecimalFloatingPointLiteral decimalFloatingPointLiteral |
|
18 exponentPart floatTypeSuffix exponentIndicator signedInteger sign |
|
19 hexSignificand binaryExponent binaryExponentIndicator |
|
20 escapeSequence singleCharacter stringCharacters stringCharacter |
|
21 octalEscape zeroToThree input operators separators trueToken |
|
22 falseToken nullToken' |
|
23 classVariableNames:'' |
|
24 poolDictionaries:'' |
|
25 category:'PetitJava-Core' |
|
26 ! |
|
27 |
|
28 PPJavaLexicon comment:'A parser with a definitions for some basic Java gramar parts |
|
29 |
|
30 Grammar rules follow as closely as possible the specification found in "The Java Language Specification Third Edition" |
|
31 |
|
32 URL = ' |
|
33 ! |
|
34 |
|
35 !PPJavaLexicon class methodsFor:'accessing'! |
|
36 |
|
37 ignoredNames |
|
38 "Answer a collection of instance-variables that should not be automatically initialized with productions, but that are used internal to the composite parser." |
|
39 |
|
40 | newArray | |
|
41 newArray := Array new: ((self namesToIgnore size) + (super ignoredNames size)). |
|
42 newArray |
|
43 replaceFrom: 1 |
|
44 to: self namesToIgnore size |
|
45 with: self namesToIgnore. |
|
46 newArray |
|
47 replaceFrom: (self namesToIgnore size + 1) |
|
48 to: newArray size |
|
49 with: super ignoredNames. |
|
50 ^newArray |
|
51 ! |
|
52 |
|
53 namesToIgnore |
|
54 |
|
55 ^#('keywords' 'operators' 'separators') |
|
56 ! ! |
|
57 |
|
58 !PPJavaLexicon methodsFor:'accessing'! |
|
59 |
|
60 start |
|
61 "Default start production." |
|
62 |
|
63 ^ input end |
|
64 ! ! |
|
65 |
|
66 !PPJavaLexicon methodsFor:'grammar-comments'! |
|
67 |
|
68 charactersInLine |
|
69 |
|
70 ^ inputCharacter plus |
|
71 ! |
|
72 |
|
73 comment |
|
74 "traditional -> /* |
|
75 endOfLine -> //" |
|
76 ^ traditionalComment / endOfLineComment |
|
77 ! |
|
78 |
|
79 commentTail |
|
80 |
|
81 ^ ('*' asParser , commentTailStar ) / |
|
82 (notStar , commentTail) |
|
83 ! |
|
84 |
|
85 commentTailStar |
|
86 |
|
87 ^ ('/' asParser ) / |
|
88 ('*' asParser , commentTailStar ) / |
|
89 (notStarNotSlash , commentTail ) |
|
90 ! |
|
91 |
|
92 endOfLineComment |
|
93 |
|
94 ^ '//' asParser , charactersInLine optional |
|
95 ! |
|
96 |
|
97 notStar |
|
98 |
|
99 ^ ('*' asParser not , inputCharacter)/lineTerminator |
|
100 ! |
|
101 |
|
102 notStarNotSlash |
|
103 |
|
104 ^ lineTerminator / ((PPPredicateObjectParser anyOf: '*/') not , inputCharacter ) |
|
105 ! |
|
106 |
|
107 traditionalComment |
|
108 |
|
109 ^ '/*' asParser , commentTail |
|
110 ! ! |
|
111 |
|
112 !PPJavaLexicon methodsFor:'grammar-identifiers'! |
|
113 |
|
114 identifier |
|
115 |
|
116 ^ self asToken: (((keyword not) , (booleanLiteral not) , (nullLiteral not) , identifierChars )) |
|
117 ! |
|
118 |
|
119 identifierChars |
|
120 |
|
121 ^ javaLetter plus , javaLetterOrDigit star |
|
122 ! |
|
123 |
|
124 javaLetter |
|
125 |
|
126 ^ (#letter asParser) / (PPPredicateObjectParser anyOf: '_$') |
|
127 ! |
|
128 |
|
129 javaLetterOrDigit |
|
130 |
|
131 ^ javaLetter / (#digit asParser) |
|
132 ! ! |
|
133 |
|
134 !PPJavaLexicon methodsFor:'grammar-input'! |
|
135 |
|
136 input |
|
137 |
|
138 ^ (inputElements optional) , (sub optional) |
|
139 ! |
|
140 |
|
141 inputElement |
|
142 |
|
143 ^ whiteSpace / comment / javaToken |
|
144 ! |
|
145 |
|
146 inputElements |
|
147 |
|
148 ^ inputElement plus |
|
149 ! |
|
150 |
|
151 javaToken |
|
152 |
|
153 |
|
154 ^ identifier / keyword / literal / separator / operator |
|
155 ! |
|
156 |
|
157 sub |
|
158 |
|
159 ^ (Character value: 26) asParser |
|
160 ! ! |
|
161 |
|
162 !PPJavaLexicon methodsFor:'grammar-keywords'! |
|
163 |
|
164 keyword |
|
165 |
|
166 | keywordParsers | |
|
167 |
|
168 keywordParsers := keywords keysSortedSafely |
|
169 collect: [:eachKey | keywords at: eachKey ]. |
|
170 ^ self asToken: ( (keywordParsers reduce: [ :a :b | a / b ]) ) |
|
171 ! ! |
|
172 |
|
173 !PPJavaLexicon methodsFor:'grammar-lineTerminators'! |
|
174 |
|
175 inputCharacter |
|
176 |
|
177 ^(lineTerminator not) , unicodeInputCharacter ==> #second |
|
178 ! |
|
179 |
|
180 lineTerminator |
|
181 |
|
182 ^ (Character lf asParser) / (Character cr asParser , (Character lf asParser ) optional ) |
|
183 ! ! |
|
184 |
|
185 !PPJavaLexicon methodsFor:'grammar-literals'! |
|
186 |
|
187 literal |
|
188 "a literal must be a single token. Whitespaces are not allowed inside the literal" |
|
189 |
|
190 ^ nullLiteral / booleanLiteral / floatingPointLiteral / integerLiteral / characterLiteral / stringLiteral |
|
191 ! ! |
|
192 |
|
193 !PPJavaLexicon methodsFor:'grammar-literals-boolean'! |
|
194 |
|
195 booleanLiteral |
|
196 |
|
197 ^ trueToken / falseToken |
|
198 ! |
|
199 |
|
200 falseToken |
|
201 ^ ('false' asParser , #word asParser not) javaToken |
|
202 ! |
|
203 |
|
204 nullToken |
|
205 ^ ('null' asParser , #word asParser not) javaToken |
|
206 ! |
|
207 |
|
208 trueToken |
|
209 ^ ('true' asParser , #word asParser not) javaToken |
|
210 ! ! |
|
211 |
|
212 !PPJavaLexicon methodsFor:'grammar-literals-character'! |
|
213 |
|
214 characterLiteral |
|
215 |
|
216 ^ ($' asParser , ( escapeSequence / singleCharacter ), $' asParser) javaToken |
|
217 ! |
|
218 |
|
219 singleCharacter |
|
220 |
|
221 ^( PPPredicateObjectParser anyOf: '''\') not , inputCharacter ==> #second |
|
222 ! ! |
|
223 |
|
224 !PPJavaLexicon methodsFor:'grammar-literals-escape'! |
|
225 |
|
226 escapeSequence |
|
227 |
|
228 ^ ($\ asParser , (PPPredicateObjectParser anyOf: 'btnfr""''\' ) ) / |
|
229 octalEscape |
|
230 ! |
|
231 |
|
232 octalEscape |
|
233 |
|
234 ^ $\ asParser , ( (zeroToThree , octalDigit , octalDigit) / (octalDigit , octalDigit optional) ) |
|
235 ! |
|
236 |
|
237 zeroToThree |
|
238 |
|
239 ^PPPredicateObjectParser anyOf: '0123' |
|
240 ! ! |
|
241 |
|
242 !PPJavaLexicon methodsFor:'grammar-literals-floating'! |
|
243 |
|
244 binaryExponent |
|
245 |
|
246 ^ binaryExponentIndicator , signedInteger |
|
247 ! |
|
248 |
|
249 binaryExponentIndicator |
|
250 |
|
251 ^ PPPredicateObjectParser anyOf: 'pP' |
|
252 ! |
|
253 |
|
254 decimalFloatingPointLiteral |
|
255 |
|
256 |dot| |
|
257 dot := $. asParser. |
|
258 |
|
259 ^ ( ( (dot , digits) |
|
260 / |
|
261 (digits , dot , digits optional)) , |
|
262 exponentPart optional , floatTypeSuffix optional ) |
|
263 / |
|
264 (digits , |
|
265 ( (exponentPart , floatTypeSuffix optional) |
|
266 / |
|
267 (exponentPart optional , floatTypeSuffix) )) |
|
268 ! |
|
269 |
|
270 exponentIndicator |
|
271 |
|
272 ^ PPPredicateObjectParser anyOf: 'eE' |
|
273 ! |
|
274 |
|
275 exponentPart |
|
276 |
|
277 ^ exponentIndicator , signedInteger |
|
278 ! |
|
279 |
|
280 floatTypeSuffix |
|
281 |
|
282 ^ PPPredicateObjectParser anyOf: 'fFdD' |
|
283 ! |
|
284 |
|
285 floatingPointLiteral |
|
286 |
|
287 ^ (hexadecimalFloatingPointLiteral / decimalFloatingPointLiteral) javaToken |
|
288 ! |
|
289 |
|
290 hexSignificand |
|
291 |dot| |
|
292 dot := $. asParser. |
|
293 |
|
294 ^ (hexNumeral , dot optional) / |
|
295 ($0 asParser , (PPPredicateObjectParser anyOf: 'xX') , hexDigits optional , dot , hexDigits ) |
|
296 ! |
|
297 |
|
298 hexadecimalFloatingPointLiteral |
|
299 |
|
300 ^ hexSignificand , binaryExponent , floatTypeSuffix optional |
|
301 ! |
|
302 |
|
303 sign |
|
304 |
|
305 ^PPPredicateObjectParser anyOf: '-+' |
|
306 ! |
|
307 |
|
308 signedInteger |
|
309 |
|
310 ^ sign optional , digits |
|
311 ! ! |
|
312 |
|
313 !PPJavaLexicon methodsFor:'grammar-literals-integer'! |
|
314 |
|
315 decimalIntegerLiteral |
|
316 |
|
317 ^ decimalNumeral , (integerTypeSuffix optional) |
|
318 ! |
|
319 |
|
320 decimalNumeral |
|
321 |
|
322 ^($0 asParser) / (nonZeroDigit , digits optional) |
|
323 ! |
|
324 |
|
325 digits |
|
326 "digit is already defined, no need to redefine it" |
|
327 ^#digit asParser plus |
|
328 ! |
|
329 |
|
330 hexDigits |
|
331 |
|
332 ^hexDigit plus |
|
333 ! |
|
334 |
|
335 hexIntegerLiteral |
|
336 |
|
337 ^ hexNumeral , (integerTypeSuffix optional) |
|
338 ! |
|
339 |
|
340 hexNumeral |
|
341 |
|
342 ^$0 asParser, (PPPredicateObjectParser anyOf: 'xX' ), hexDigits |
|
343 ! |
|
344 |
|
345 integerLiteral |
|
346 |
|
347 ^ (hexIntegerLiteral / octalIntegerLiteral / decimalIntegerLiteral) javaToken |
|
348 ! |
|
349 |
|
350 integerTypeSuffix |
|
351 |
|
352 ^ PPPredicateObjectParser anyOf: 'lL' |
|
353 ! |
|
354 |
|
355 nonZeroDigit |
|
356 |
|
357 ^PPPredicateObjectParser anyOf: '123456789'. |
|
358 ! |
|
359 |
|
360 octalDigit |
|
361 |
|
362 ^PPPredicateObjectParser anyOf: '01234567' |
|
363 ! |
|
364 |
|
365 octalDigits |
|
366 |
|
367 ^ octalDigit plus |
|
368 ! |
|
369 |
|
370 octalIntegerLiteral |
|
371 |
|
372 ^ octalNumeral , (integerTypeSuffix optional) |
|
373 ! |
|
374 |
|
375 octalNumeral |
|
376 |
|
377 ^($0 asParser) , octalDigits |
|
378 ! ! |
|
379 |
|
380 !PPJavaLexicon methodsFor:'grammar-literals-null'! |
|
381 |
|
382 nullLiteral |
|
383 |
|
384 ^ nullToken |
|
385 ! ! |
|
386 |
|
387 !PPJavaLexicon methodsFor:'grammar-literals-string'! |
|
388 |
|
389 stringCharacter |
|
390 |
|
391 ^ ( ( PPPredicateObjectParser anyOf: '"\') not , inputCharacter ==> #second ) / |
|
392 escapeSequence |
|
393 ! |
|
394 |
|
395 stringCharacters |
|
396 |
|
397 ^ stringCharacter plus |
|
398 ! |
|
399 |
|
400 stringLiteral |
|
401 |
|
402 ^ ($" asParser , stringCharacters optional , $" asParser) javaToken |
|
403 ! ! |
|
404 |
|
405 !PPJavaLexicon methodsFor:'grammar-operators'! |
|
406 |
|
407 operator |
|
408 | operatorParsers | |
|
409 |
|
410 operatorParsers := operators keysSortedSafely |
|
411 collect: [:eachKey | operators at: eachKey ]. |
|
412 |
|
413 ^self asToken: (operatorParsers reduce: [ :a :b | a / b ]) |
|
414 ! ! |
|
415 |
|
416 !PPJavaLexicon methodsFor:'grammar-separators'! |
|
417 |
|
418 separator |
|
419 ^self asToken: (PPPredicateObjectParser anyOf: '(){}[];,.' ) |
|
420 ! ! |
|
421 |
|
422 !PPJavaLexicon methodsFor:'grammar-unicode-escapes'! |
|
423 |
|
424 hexDigit |
|
425 |
|
426 ^#hex asParser |
|
427 ! |
|
428 |
|
429 rawInputCharacter |
|
430 |
|
431 ^#any asParser |
|
432 ! |
|
433 |
|
434 unicodeEscape |
|
435 |
|
436 ^ $\ asParser , unicodeMarker , hexDigit , hexDigit , hexDigit , hexDigit |
|
437 ! |
|
438 |
|
439 unicodeInputCharacter |
|
440 ^ unicodeEscape / rawInputCharacter |
|
441 ! |
|
442 |
|
443 unicodeMarker |
|
444 |
|
445 ^$u asParser plus |
|
446 ! ! |
|
447 |
|
448 !PPJavaLexicon methodsFor:'grammar-whiteSpace'! |
|
449 |
|
450 whiteSpace |
|
451 |
|
452 ^ (Character space asParser ) / |
|
453 (Character tab asParser ) / |
|
454 ((Character value: 12) asParser ) / |
|
455 lineTerminator |
|
456 ! ! |
|
457 |
|
458 !PPJavaLexicon methodsFor:'initialization'! |
|
459 |
|
460 initialize |
|
461 |
|
462 super initialize. |
|
463 |
|
464 self initializeKeywords. |
|
465 self initializeOperators. |
|
466 self initializeSeparators. |
|
467 ! |
|
468 |
|
469 initializeKeywords |
|
470 |
|
471 | values | |
|
472 keywords := Dictionary new. |
|
473 values := #('abstract' 'assert' 'boolean' 'break' 'byte' 'case' 'catch' 'char' 'class' 'const' |
|
474 'continue' 'default' 'do' 'double' 'else' 'enum' 'extends' 'final' 'finally' 'float' |
|
475 'for' 'if' 'goto' 'implements' 'import' 'instanceof' 'int' 'interface' 'long' 'native' |
|
476 'new' 'package' 'private' 'protected' 'public' 'return' 'short' 'static' 'strictfp' 'super' |
|
477 'switch' 'synchronized' 'this' 'throw' 'throws' 'transient' 'try' 'void' 'volatile' 'while'). |
|
478 |
|
479 values do: [:eachKeyword | |
|
480 keywords at: eachKeyword |
|
481 put: (PPUnresolvedParser named: ('keyword', eachKeyword first asUppercase asString , eachKeyword allButFirst)) |
|
482 ]. |
|
483 |
|
484 keywords keysAndValuesDo: [:key :value | |
|
485 (keywords at: key) def: (key asParser , #word asParser not)] |
|
486 ! |
|
487 |
|
488 initializeOperators |
|
489 |
|
490 | values | |
|
491 operators := Dictionary new. |
|
492 values := #( '>>>=' '>>>' '>>=' '>>' '>=' '>' '<<=' '<<' '<=' '<' '++' '+=' '+' '--' '-=' '-' '&&' '&=' '&' |
|
493 '||' '|=' '|' '*=' '*' '%=' '%' '/=' '/' '^=' '^' '!!=' '!!' '==' '=' '~' '?' ':' '@' ). |
|
494 " @ ? perhaps for annotation but not in the doc " |
|
495 values do: [:eachOperator | |
|
496 operators at: eachOperator |
|
497 put: (PPUnresolvedParser named: ('operator', eachOperator asString)) |
|
498 ]. |
|
499 |
|
500 operators keysAndValuesDo: [:key :value | |
|
501 (operators at: key) def: (key asParser)] |
|
502 ! |
|
503 |
|
504 initializeSeparators |
|
505 |
|
506 | values | |
|
507 separators := Dictionary new. |
|
508 values := #( '(' ')' '{' '}' '[' ']' ';' ',' '.' ). |
|
509 |
|
510 values do: [:eachSeparator | |
|
511 separators at: eachSeparator |
|
512 put: (PPUnresolvedParser named: ('separator', eachSeparator asString)) |
|
513 ]. |
|
514 |
|
515 separators keysAndValuesDo: [:key :value | |
|
516 (separators at: key) def: (key asParser)] |
|
517 ! ! |
|
518 |
|
519 !PPJavaLexicon methodsFor:'utility'! |
|
520 |
|
521 asToken: aParser |
|
522 |
|
523 ^aParser javaToken |
|
524 ! |
|
525 |
|
526 emptySquaredParenthesis |
|
527 |
|
528 ^ self asToken: (((self tokenFor: '['), (self tokenFor: ']'))) |
|
529 ! |
|
530 |
|
531 tokenFor: aString |
|
532 |
|
533 ^self asToken: (keywords at: aString |
|
534 ifAbsent: [separators at: aString |
|
535 ifAbsent: [operators at: aString] ]) |
|
536 ! ! |
|
537 |