435
|
1 |
"{ Package: 'stx:goodies/petitparser/parsers/java' }"
|
|
2 |
|
|
3 |
"{ NameSpace: Smalltalk }"
|
|
4 |
|
|
5 |
PPCompositeParser subclass:#PPJavaLexicon
|
|
6 |
instanceVariableNames:'unicodeEscape rawInputCharacter unicodeMarker hexDigit
|
|
7 |
lineTerminator unicodeInputCharacter inputElements sub
|
|
8 |
inputElement whiteSpace comment javaToken keyword literal
|
|
9 |
separator operator identifier traditionalComment endOfLineComment
|
|
10 |
commentTail charactersInLine commentTailStar notStar
|
|
11 |
notStarNotSlash inputCharacter booleanLiteral nullLiteral
|
|
12 |
identifierChars javaLetter javaLetterOrDigit keywords
|
|
13 |
floatingPointLiteral integerLiteral characterLiteral
|
|
14 |
stringLiteral hexIntegerLiteral octalIntegerLiteral
|
|
15 |
decimalIntegerLiteral decimalNumeral integerTypeSuffix hexNumeral
|
|
16 |
octalNumeral nonZeroDigit digits hexDigits octalDigits octalDigit
|
|
17 |
hexadecimalFloatingPointLiteral decimalFloatingPointLiteral
|
|
18 |
exponentPart floatTypeSuffix exponentIndicator signedInteger sign
|
|
19 |
hexSignificand binaryExponent binaryExponentIndicator
|
|
20 |
escapeSequence singleCharacter stringCharacters stringCharacter
|
|
21 |
octalEscape zeroToThree input operators separators trueToken
|
|
22 |
falseToken nullToken'
|
|
23 |
classVariableNames:''
|
|
24 |
poolDictionaries:''
|
|
25 |
category:'PetitJava-Core'
|
|
26 |
!
|
|
27 |
|
|
28 |
PPJavaLexicon comment:'A parser with a definitions for some basic Java gramar parts
Grammar rules follow as closely as possible the specification found in "The Java Language Specification Third Edition"
URL = '
|
|
29 |
!
|
|
30 |
|
|
31 |
!PPJavaLexicon class methodsFor:'accessing'!
|
|
32 |
|
|
33 |
ignoredNames
|
|
34 |
"Answer a collection of instance-variables that should not be automatically initialized with productions, but that are used internal to the composite parser."
|
|
35 |
|
|
36 |
| newArray |
|
|
37 |
newArray := Array new: ((self namesToIgnore size) + (super ignoredNames size)).
|
|
38 |
newArray
|
|
39 |
replaceFrom: 1
|
|
40 |
to: self namesToIgnore size
|
|
41 |
with: self namesToIgnore.
|
|
42 |
newArray
|
|
43 |
replaceFrom: (self namesToIgnore size + 1)
|
|
44 |
to: newArray size
|
|
45 |
with: super ignoredNames.
|
|
46 |
^newArray
|
|
47 |
!
|
|
48 |
|
|
49 |
namesToIgnore
|
|
50 |
|
|
51 |
^#('keywords' 'operators' 'separators')
|
|
52 |
! !
|
|
53 |
|
|
54 |
!PPJavaLexicon methodsFor:'accessing'!
|
|
55 |
|
|
56 |
start
|
|
57 |
"Default start production."
|
|
58 |
|
|
59 |
^ input end
|
|
60 |
! !
|
|
61 |
|
|
62 |
!PPJavaLexicon methodsFor:'grammar-comments'!
|
|
63 |
|
|
64 |
charactersInLine
|
|
65 |
|
|
66 |
^ inputCharacter plus
|
|
67 |
!
|
|
68 |
|
|
69 |
comment
|
|
70 |
"traditional -> /*
|
|
71 |
endOfLine -> //"
|
|
72 |
^ traditionalComment / endOfLineComment
|
|
73 |
!
|
|
74 |
|
|
75 |
commentTail
|
|
76 |
|
|
77 |
^ ('*' asParser , commentTailStar ) /
|
|
78 |
(notStar , commentTail)
|
|
79 |
!
|
|
80 |
|
|
81 |
commentTailStar
|
|
82 |
|
|
83 |
^ ('/' asParser ) /
|
|
84 |
('*' asParser , commentTailStar ) /
|
|
85 |
(notStarNotSlash , commentTail )
|
|
86 |
!
|
|
87 |
|
|
88 |
endOfLineComment
|
|
89 |
|
|
90 |
^ '//' asParser , charactersInLine optional
|
|
91 |
!
|
|
92 |
|
|
93 |
notStar
|
|
94 |
|
|
95 |
^ ('*' asParser not , inputCharacter)/lineTerminator
|
|
96 |
!
|
|
97 |
|
|
98 |
notStarNotSlash
|
|
99 |
|
|
100 |
^ lineTerminator / ((PPPredicateObjectParser anyOf: '*/') not , inputCharacter )
|
|
101 |
!
|
|
102 |
|
|
103 |
traditionalComment
|
|
104 |
|
|
105 |
^ '/*' asParser , commentTail
|
|
106 |
! !
|
|
107 |
|
|
108 |
!PPJavaLexicon methodsFor:'grammar-identifiers'!
|
|
109 |
|
|
110 |
identifier
|
|
111 |
|
|
112 |
^ self asToken: (((keyword not) , (booleanLiteral not) , (nullLiteral not) , identifierChars ))
|
|
113 |
!
|
|
114 |
|
|
115 |
identifierChars
|
|
116 |
|
|
117 |
^ javaLetter plus , javaLetterOrDigit star
|
|
118 |
!
|
|
119 |
|
|
120 |
javaLetter
|
|
121 |
|
|
122 |
^ (#letter asParser) / (PPPredicateObjectParser anyOf: '_$')
|
|
123 |
!
|
|
124 |
|
|
125 |
javaLetterOrDigit
|
|
126 |
|
|
127 |
^ javaLetter / (#digit asParser)
|
|
128 |
! !
|
|
129 |
|
|
130 |
!PPJavaLexicon methodsFor:'grammar-input'!
|
|
131 |
|
|
132 |
input
|
|
133 |
|
|
134 |
^ (inputElements optional) , (sub optional)
|
|
135 |
!
|
|
136 |
|
|
137 |
inputElement
|
|
138 |
|
|
139 |
^ whiteSpace / comment / javaToken
|
|
140 |
!
|
|
141 |
|
|
142 |
inputElements
|
|
143 |
|
|
144 |
^ inputElement plus
|
|
145 |
!
|
|
146 |
|
|
147 |
javaToken
|
|
148 |
|
|
149 |
|
|
150 |
^ identifier / keyword / literal / separator / operator
|
|
151 |
!
|
|
152 |
|
|
153 |
sub
|
|
154 |
|
|
155 |
^ (Character value: 26) asParser
|
|
156 |
! !
|
|
157 |
|
|
158 |
!PPJavaLexicon methodsFor:'grammar-keywords'!
|
|
159 |
|
|
160 |
keyword
|
|
161 |
|
|
162 |
| keywordParsers |
|
|
163 |
|
|
164 |
keywordParsers := keywords keysSortedSafely
|
|
165 |
collect: [:eachKey | keywords at: eachKey ].
|
|
166 |
^ self asToken: ( (keywordParsers reduce: [ :a :b | a / b ]) )
|
|
167 |
! !
|
|
168 |
|
|
169 |
!PPJavaLexicon methodsFor:'grammar-lineTerminators'!
|
|
170 |
|
|
171 |
inputCharacter
|
|
172 |
|
|
173 |
^(lineTerminator not) , unicodeInputCharacter ==> #second
|
|
174 |
!
|
|
175 |
|
|
176 |
lineTerminator
|
|
177 |
|
|
178 |
^ (Character lf asParser) / (Character cr asParser , (Character lf asParser ) optional )
|
|
179 |
! !
|
|
180 |
|
|
181 |
!PPJavaLexicon methodsFor:'grammar-literals'!
|
|
182 |
|
|
183 |
literal
|
|
184 |
"a literal must be a single token. Whitespaces are not allowed inside the literal"
|
|
185 |
|
|
186 |
^ nullLiteral / booleanLiteral / floatingPointLiteral / integerLiteral / characterLiteral / stringLiteral
|
|
187 |
! !
|
|
188 |
|
|
189 |
!PPJavaLexicon methodsFor:'grammar-literals-boolean'!
|
|
190 |
|
|
191 |
booleanLiteral
|
|
192 |
|
|
193 |
^ trueToken / falseToken
|
|
194 |
!
|
|
195 |
|
|
196 |
falseToken
|
|
197 |
^ ('false' asParser , #word asParser not) javaToken
|
|
198 |
!
|
|
199 |
|
|
200 |
nullToken
|
|
201 |
^ ('null' asParser , #word asParser not) javaToken
|
|
202 |
!
|
|
203 |
|
|
204 |
trueToken
|
|
205 |
^ ('true' asParser , #word asParser not) javaToken
|
|
206 |
! !
|
|
207 |
|
|
208 |
!PPJavaLexicon methodsFor:'grammar-literals-character'!
|
|
209 |
|
|
210 |
characterLiteral
|
|
211 |
|
|
212 |
^ ($' asParser , ( escapeSequence / singleCharacter ), $' asParser) javaToken
|
|
213 |
!
|
|
214 |
|
|
215 |
singleCharacter
|
|
216 |
|
|
217 |
^( PPPredicateObjectParser anyOf: '''\') not , inputCharacter ==> #second
|
|
218 |
! !
|
|
219 |
|
|
220 |
!PPJavaLexicon methodsFor:'grammar-literals-escape'!
|
|
221 |
|
|
222 |
escapeSequence
|
|
223 |
|
|
224 |
^ ($\ asParser , (PPPredicateObjectParser anyOf: 'btnfr""''\' ) ) /
|
|
225 |
octalEscape
|
|
226 |
!
|
|
227 |
|
|
228 |
octalEscape
|
|
229 |
|
|
230 |
^ $\ asParser , ( (zeroToThree , octalDigit , octalDigit) / (octalDigit , octalDigit optional) )
|
|
231 |
!
|
|
232 |
|
|
233 |
zeroToThree
|
|
234 |
|
|
235 |
^PPPredicateObjectParser anyOf: '0123'
|
|
236 |
! !
|
|
237 |
|
|
238 |
!PPJavaLexicon methodsFor:'grammar-literals-floating'!
|
|
239 |
|
|
240 |
binaryExponent
|
|
241 |
|
|
242 |
^ binaryExponentIndicator , signedInteger
|
|
243 |
!
|
|
244 |
|
|
245 |
binaryExponentIndicator
|
|
246 |
|
|
247 |
^ PPPredicateObjectParser anyOf: 'pP'
|
|
248 |
!
|
|
249 |
|
|
250 |
decimalFloatingPointLiteral
|
|
251 |
|
|
252 |
|dot|
|
|
253 |
dot := $. asParser.
|
|
254 |
|
|
255 |
^ ( ( (dot , digits)
|
|
256 |
/
|
|
257 |
(digits , dot , digits optional)) ,
|
|
258 |
exponentPart optional , floatTypeSuffix optional )
|
|
259 |
/
|
|
260 |
(digits ,
|
|
261 |
( (exponentPart , floatTypeSuffix optional)
|
|
262 |
/
|
|
263 |
(exponentPart optional , floatTypeSuffix) ))
|
|
264 |
!
|
|
265 |
|
|
266 |
exponentIndicator
|
|
267 |
|
|
268 |
^ PPPredicateObjectParser anyOf: 'eE'
|
|
269 |
!
|
|
270 |
|
|
271 |
exponentPart
|
|
272 |
|
|
273 |
^ exponentIndicator , signedInteger
|
|
274 |
!
|
|
275 |
|
|
276 |
floatTypeSuffix
|
|
277 |
|
|
278 |
^ PPPredicateObjectParser anyOf: 'fFdD'
|
|
279 |
!
|
|
280 |
|
|
281 |
floatingPointLiteral
|
|
282 |
|
|
283 |
^ (hexadecimalFloatingPointLiteral / decimalFloatingPointLiteral) javaToken
|
|
284 |
!
|
|
285 |
|
|
286 |
hexSignificand
|
|
287 |
|dot|
|
|
288 |
dot := $. asParser.
|
|
289 |
|
|
290 |
^ (hexNumeral , dot optional) /
|
|
291 |
($0 asParser , (PPPredicateObjectParser anyOf: 'xX') , hexDigits optional , dot , hexDigits )
|
|
292 |
!
|
|
293 |
|
|
294 |
hexadecimalFloatingPointLiteral
|
|
295 |
|
|
296 |
^ hexSignificand , binaryExponent , floatTypeSuffix optional
|
|
297 |
!
|
|
298 |
|
|
299 |
sign
|
|
300 |
|
|
301 |
^PPPredicateObjectParser anyOf: '-+'
|
|
302 |
!
|
|
303 |
|
|
304 |
signedInteger
|
|
305 |
|
|
306 |
^ sign optional , digits
|
|
307 |
! !
|
|
308 |
|
|
309 |
!PPJavaLexicon methodsFor:'grammar-literals-integer'!
|
|
310 |
|
|
311 |
decimalIntegerLiteral
|
|
312 |
|
|
313 |
^ decimalNumeral , (integerTypeSuffix optional)
|
|
314 |
!
|
|
315 |
|
|
316 |
decimalNumeral
|
|
317 |
|
|
318 |
^($0 asParser) / (nonZeroDigit , digits optional)
|
|
319 |
!
|
|
320 |
|
|
321 |
digits
|
|
322 |
"digit is already defined, no need to redefine it"
|
|
323 |
^#digit asParser plus
|
|
324 |
!
|
|
325 |
|
|
326 |
hexDigits
|
|
327 |
|
|
328 |
^hexDigit plus
|
|
329 |
!
|
|
330 |
|
|
331 |
hexIntegerLiteral
|
|
332 |
|
|
333 |
^ hexNumeral , (integerTypeSuffix optional)
|
|
334 |
!
|
|
335 |
|
|
336 |
hexNumeral
|
|
337 |
|
|
338 |
^$0 asParser, (PPPredicateObjectParser anyOf: 'xX' ), hexDigits
|
|
339 |
!
|
|
340 |
|
|
341 |
integerLiteral
|
|
342 |
|
|
343 |
^ (hexIntegerLiteral / octalIntegerLiteral / decimalIntegerLiteral) javaToken
|
|
344 |
!
|
|
345 |
|
|
346 |
integerTypeSuffix
|
|
347 |
|
|
348 |
^ PPPredicateObjectParser anyOf: 'lL'
|
|
349 |
!
|
|
350 |
|
|
351 |
nonZeroDigit
|
|
352 |
|
|
353 |
^PPPredicateObjectParser anyOf: '123456789'.
|
|
354 |
!
|
|
355 |
|
|
356 |
octalDigit
|
|
357 |
|
|
358 |
^PPPredicateObjectParser anyOf: '01234567'
|
|
359 |
!
|
|
360 |
|
|
361 |
octalDigits
|
|
362 |
|
|
363 |
^ octalDigit plus
|
|
364 |
!
|
|
365 |
|
|
366 |
octalIntegerLiteral
|
|
367 |
|
|
368 |
^ octalNumeral , (integerTypeSuffix optional)
|
|
369 |
!
|
|
370 |
|
|
371 |
octalNumeral
|
|
372 |
|
|
373 |
^($0 asParser) , octalDigits
|
|
374 |
! !
|
|
375 |
|
|
376 |
!PPJavaLexicon methodsFor:'grammar-literals-null'!
|
|
377 |
|
|
378 |
nullLiteral
|
|
379 |
|
|
380 |
^ nullToken
|
|
381 |
! !
|
|
382 |
|
|
383 |
!PPJavaLexicon methodsFor:'grammar-literals-string'!
|
|
384 |
|
|
385 |
stringCharacter
|
|
386 |
|
|
387 |
^ ( ( PPPredicateObjectParser anyOf: '"\') not , inputCharacter ==> #second ) /
|
|
388 |
escapeSequence
|
|
389 |
!
|
|
390 |
|
|
391 |
stringCharacters
|
|
392 |
|
|
393 |
^ stringCharacter plus
|
|
394 |
!
|
|
395 |
|
|
396 |
stringLiteral
|
|
397 |
|
|
398 |
^ ($" asParser , stringCharacters optional , $" asParser) javaToken
|
|
399 |
! !
|
|
400 |
|
|
401 |
!PPJavaLexicon methodsFor:'grammar-operators'!
|
|
402 |
|
|
403 |
operator
|
|
404 |
| operatorParsers |
|
|
405 |
|
|
406 |
operatorParsers := operators keysSortedSafely
|
|
407 |
collect: [:eachKey | operators at: eachKey ].
|
|
408 |
|
|
409 |
^self asToken: (operatorParsers reduce: [ :a :b | a / b ])
|
|
410 |
! !
|
|
411 |
|
|
412 |
!PPJavaLexicon methodsFor:'grammar-separators'!
|
|
413 |
|
|
414 |
separator
|
|
415 |
^self asToken: (PPPredicateObjectParser anyOf: '(){}[];,.' )
|
|
416 |
! !
|
|
417 |
|
|
418 |
!PPJavaLexicon methodsFor:'grammar-unicode-escapes'!
|
|
419 |
|
|
420 |
hexDigit
|
|
421 |
|
|
422 |
^#hex asParser
|
|
423 |
!
|
|
424 |
|
|
425 |
rawInputCharacter
|
|
426 |
|
|
427 |
^#any asParser
|
|
428 |
!
|
|
429 |
|
|
430 |
unicodeEscape
|
|
431 |
|
|
432 |
^ $\ asParser , unicodeMarker , hexDigit , hexDigit , hexDigit , hexDigit
|
|
433 |
!
|
|
434 |
|
|
435 |
unicodeInputCharacter
|
|
436 |
^ unicodeEscape / rawInputCharacter
|
|
437 |
!
|
|
438 |
|
|
439 |
unicodeMarker
|
|
440 |
|
|
441 |
^$u asParser plus
|
|
442 |
! !
|
|
443 |
|
|
444 |
!PPJavaLexicon methodsFor:'grammar-whiteSpace'!
|
|
445 |
|
|
446 |
whiteSpace
|
|
447 |
|
|
448 |
^ (Character space asParser ) /
|
|
449 |
(Character tab asParser ) /
|
|
450 |
((Character value: 12) asParser ) /
|
|
451 |
lineTerminator
|
|
452 |
! !
|
|
453 |
|
|
454 |
!PPJavaLexicon methodsFor:'initialization'!
|
|
455 |
|
|
456 |
initialize
|
|
457 |
|
|
458 |
super initialize.
|
|
459 |
|
|
460 |
self initializeKeywords.
|
|
461 |
self initializeOperators.
|
|
462 |
self initializeSeparators.
|
|
463 |
!
|
|
464 |
|
|
465 |
initializeKeywords
|
|
466 |
|
|
467 |
| values |
|
|
468 |
keywords := Dictionary new.
|
|
469 |
values := #('abstract' 'assert' 'boolean' 'break' 'byte' 'case' 'catch' 'char' 'class' 'const'
|
|
470 |
'continue' 'default' 'do' 'double' 'else' 'enum' 'extends' 'final' 'finally' 'float'
|
|
471 |
'for' 'if' 'goto' 'implements' 'import' 'instanceof' 'int' 'interface' 'long' 'native'
|
|
472 |
'new' 'package' 'private' 'protected' 'public' 'return' 'short' 'static' 'strictfp' 'super'
|
|
473 |
'switch' 'synchronized' 'this' 'throw' 'throws' 'transient' 'try' 'void' 'volatile' 'while').
|
|
474 |
|
|
475 |
values do: [:eachKeyword |
|
|
476 |
keywords at: eachKeyword
|
|
477 |
put: (PPUnresolvedParser named: ('keyword', eachKeyword first asUppercase asString , eachKeyword allButFirst))
|
|
478 |
].
|
|
479 |
|
|
480 |
keywords keysAndValuesDo: [:key :value |
|
|
481 |
(keywords at: key) def: (key asParser , #word asParser not)]
|
|
482 |
!
|
|
483 |
|
|
484 |
initializeOperators
|
|
485 |
|
|
486 |
| values |
|
|
487 |
operators := Dictionary new.
|
|
488 |
values := #( '>>>=' '>>>' '>>=' '>>' '>=' '>' '<<=' '<<' '<=' '<' '++' '+=' '+' '--' '-=' '-' '&&' '&=' '&'
|
|
489 |
'||' '|=' '|' '*=' '*' '%=' '%' '/=' '/' '^=' '^' '!!=' '!!' '==' '=' '~' '?' ':' '@' ).
|
|
490 |
" @ ? perhaps for annotation but not in the doc "
|
|
491 |
values do: [:eachOperator |
|
|
492 |
operators at: eachOperator
|
|
493 |
put: (PPUnresolvedParser named: ('operator', eachOperator asString))
|
|
494 |
].
|
|
495 |
|
|
496 |
operators keysAndValuesDo: [:key :value |
|
|
497 |
(operators at: key) def: (key asParser)]
|
|
498 |
!
|
|
499 |
|
|
500 |
initializeSeparators
|
|
501 |
|
|
502 |
| values |
|
|
503 |
separators := Dictionary new.
|
|
504 |
values := #( '(' ')' '{' '}' '[' ']' ';' ',' '.' ).
|
|
505 |
|
|
506 |
values do: [:eachSeparator |
|
|
507 |
separators at: eachSeparator
|
|
508 |
put: (PPUnresolvedParser named: ('separator', eachSeparator asString))
|
|
509 |
].
|
|
510 |
|
|
511 |
separators keysAndValuesDo: [:key :value |
|
|
512 |
(separators at: key) def: (key asParser)]
|
|
513 |
! !
|
|
514 |
|
|
515 |
!PPJavaLexicon methodsFor:'utility'!
|
|
516 |
|
|
517 |
asToken: aParser
|
|
518 |
|
|
519 |
^aParser javaToken
|
|
520 |
!
|
|
521 |
|
|
522 |
emptySquaredParenthesis
|
|
523 |
|
|
524 |
^ self asToken: (((self tokenFor: '['), (self tokenFor: ']')))
|
|
525 |
!
|
|
526 |
|
|
527 |
tokenFor: aString
|
|
528 |
|
|
529 |
^self asToken: (keywords at: aString
|
|
530 |
ifAbsent: [separators at: aString
|
|
531 |
ifAbsent: [operators at: aString] ])
|
|
532 |
! !
|
|
533 |
|