parsers/java/PPJavaLexicon.st
changeset 435 3bc08fb90133
child 436 e1c44b571db9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/parsers/java/PPJavaLexicon.st	Tue Apr 21 14:57:16 2015 +0100
@@ -0,0 +1,533 @@
+"{ Package: 'stx:goodies/petitparser/parsers/java' }"
+
+"{ NameSpace: Smalltalk }"
+
+PPCompositeParser subclass:#PPJavaLexicon
+	instanceVariableNames:'unicodeEscape rawInputCharacter unicodeMarker hexDigit
+		lineTerminator unicodeInputCharacter inputElements sub
+		inputElement whiteSpace comment javaToken keyword literal
+		separator operator identifier traditionalComment endOfLineComment
+		commentTail charactersInLine commentTailStar notStar
+		notStarNotSlash inputCharacter booleanLiteral nullLiteral
+		identifierChars javaLetter javaLetterOrDigit keywords
+		floatingPointLiteral integerLiteral characterLiteral
+		stringLiteral hexIntegerLiteral octalIntegerLiteral
+		decimalIntegerLiteral decimalNumeral integerTypeSuffix hexNumeral
+		octalNumeral nonZeroDigit digits hexDigits octalDigits octalDigit
+		hexadecimalFloatingPointLiteral decimalFloatingPointLiteral
+		exponentPart floatTypeSuffix exponentIndicator signedInteger sign
+		hexSignificand binaryExponent binaryExponentIndicator
+		escapeSequence singleCharacter stringCharacters stringCharacter
+		octalEscape zeroToThree input operators separators trueToken
+		falseToken nullToken'
+	classVariableNames:''
+	poolDictionaries:''
+	category:'PetitJava-Core'
+!
+
+PPJavaLexicon comment:'A parser with a definitions for some basic Java gramar parts

Grammar rules follow as closely as possible the specification found in "The Java Language Specification Third Edition"

URL = '
+!
+
+!PPJavaLexicon class methodsFor:'accessing'!
+
+ignoredNames
+	"Answer a collection of instance-variables that should not be automatically initialized with productions, but that are used internal to the composite parser."
+
+	| newArray |	
+	newArray := Array new: ((self namesToIgnore size) + (super ignoredNames size)).
+	newArray
+		replaceFrom: 1
+		to: self namesToIgnore size
+		with: self namesToIgnore.
+	newArray
+		replaceFrom: (self namesToIgnore size + 1)
+		to: newArray size
+		with: super ignoredNames.	
+	^newArray
+!
+
+namesToIgnore
+
+	^#('keywords' 'operators' 'separators')
+! !
+
+!PPJavaLexicon methodsFor:'accessing'!
+
+start
+	"Default start production."
+
+	^ input end
+! !
+
+!PPJavaLexicon methodsFor:'grammar-comments'!
+
+charactersInLine   
+
+	^ inputCharacter plus
+!
+
+comment
+	"traditional -> /*
+	 endOfLine -> //"
+	^ traditionalComment / endOfLineComment
+!
+
+commentTail
+
+	^ 	('*' asParser , commentTailStar ) /
+		(notStar , commentTail)
+!
+
+commentTailStar 
+
+	^ ('/' asParser ) /
+	  ('*' asParser , commentTailStar ) /
+	  (notStarNotSlash , commentTail )
+!
+
+endOfLineComment 
+
+	^ '//' asParser , charactersInLine optional
+!
+
+notStar
+
+	^  ('*' asParser not , inputCharacter)/lineTerminator
+!
+
+notStarNotSlash  
+
+	^ lineTerminator / ((PPPredicateObjectParser anyOf: '*/') not , inputCharacter )
+!
+
+traditionalComment
+
+	^ '/*' asParser , commentTail
+! !
+
+!PPJavaLexicon methodsFor:'grammar-identifiers'!
+
+identifier 
+
+	^  self asToken: (((keyword not) , (booleanLiteral not) , (nullLiteral not) , identifierChars ))
+!
+
+identifierChars
+	
+	^ javaLetter plus , javaLetterOrDigit star
+!
+
+javaLetter
+
+	^ (#letter asParser) / (PPPredicateObjectParser anyOf: '_$')
+!
+
+javaLetterOrDigit
+
+	^ javaLetter / (#digit asParser)
+! !
+
+!PPJavaLexicon methodsFor:'grammar-input'!
+
+input
+
+	^ (inputElements optional) , (sub optional)
+!
+
+inputElement
+
+	^ whiteSpace / comment / javaToken
+!
+
+inputElements
+
+	^ inputElement plus
+!
+
+javaToken
+
+
+	^ identifier / keyword / literal / separator / operator
+!
+
+sub
+
+	^ (Character value: 26) asParser 
+! !
+
+!PPJavaLexicon methodsFor:'grammar-keywords'!
+
+keyword
+
+	| keywordParsers |
+	
+	keywordParsers := keywords keysSortedSafely 
+								collect: [:eachKey | keywords at: eachKey ].
+	^ self asToken: ( (keywordParsers reduce: [ :a :b | a / b ]) )
+! !
+
+!PPJavaLexicon methodsFor:'grammar-lineTerminators'!
+
+inputCharacter 
+
+	^(lineTerminator not) , unicodeInputCharacter ==> #second
+!
+
+lineTerminator
+
+	^ (Character lf asParser) / (Character cr asParser , (Character lf asParser ) optional )
+! !
+
+!PPJavaLexicon methodsFor:'grammar-literals'!
+
+literal
+	"a literal must be a single token. Whitespaces are not allowed inside the literal"
+	
+	^ nullLiteral / booleanLiteral / floatingPointLiteral / integerLiteral / characterLiteral / stringLiteral
+! !
+
+!PPJavaLexicon methodsFor:'grammar-literals-boolean'!
+
+booleanLiteral 
+
+ ^ trueToken / falseToken
+!
+
+falseToken
+	^ ('false' asParser , #word asParser not) javaToken
+!
+
+nullToken
+	^ ('null' asParser , #word asParser not) javaToken
+!
+
+trueToken
+	^ ('true' asParser , #word asParser not) javaToken
+! !
+
+!PPJavaLexicon methodsFor:'grammar-literals-character'!
+
+characterLiteral 
+
+ ^ ($' asParser , ( escapeSequence / singleCharacter ), $' asParser) javaToken
+!
+
+singleCharacter 	
+
+	^( PPPredicateObjectParser anyOf: '''\') not , inputCharacter ==> #second
+! !
+
+!PPJavaLexicon methodsFor:'grammar-literals-escape'!
+
+escapeSequence 
+
+	^ ($\ asParser , (PPPredicateObjectParser anyOf: 'btnfr""''\' ) ) /
+	   octalEscape 
+!
+
+octalEscape
+
+	^ $\ asParser , ( (zeroToThree , octalDigit , octalDigit) / (octalDigit , octalDigit optional) )
+!
+
+zeroToThree
+
+	^PPPredicateObjectParser anyOf: '0123'
+! !
+
+!PPJavaLexicon methodsFor:'grammar-literals-floating'!
+
+binaryExponent
+
+ ^ binaryExponentIndicator , signedInteger
+!
+
+binaryExponentIndicator
+
+  ^ PPPredicateObjectParser anyOf: 'pP'
+!
+
+decimalFloatingPointLiteral
+
+	|dot|
+	dot := $. asParser.
+
+ ^ ( ( (dot , digits) 
+        / 
+        (digits , dot , digits optional)) , 
+			exponentPart optional , floatTypeSuffix optional ) 
+  	/ 
+  	(digits , 
+		( (exponentPart , floatTypeSuffix optional) 
+		  /
+		  (exponentPart optional , floatTypeSuffix) ))
+!
+
+exponentIndicator
+
+  ^ PPPredicateObjectParser anyOf: 'eE'
+!
+
+exponentPart
+
+ ^ exponentIndicator , signedInteger
+!
+
+floatTypeSuffix
+
+	^ PPPredicateObjectParser anyOf: 'fFdD'
+!
+
+floatingPointLiteral
+
+  ^ (hexadecimalFloatingPointLiteral / decimalFloatingPointLiteral) javaToken
+!
+
+hexSignificand 
+	|dot|
+	dot := $. asParser.
+
+ ^  (hexNumeral , dot optional) /
+    ($0 asParser , (PPPredicateObjectParser anyOf: 'xX') , hexDigits optional , dot , hexDigits )
+!
+
+hexadecimalFloatingPointLiteral
+
+ ^ hexSignificand , binaryExponent , floatTypeSuffix optional
+!
+
+sign
+
+  ^PPPredicateObjectParser anyOf: '-+'
+!
+
+signedInteger
+
+  ^ sign optional , digits
+! !
+
+!PPJavaLexicon methodsFor:'grammar-literals-integer'!
+
+decimalIntegerLiteral
+
+ ^ decimalNumeral , (integerTypeSuffix optional)
+!
+
+decimalNumeral 
+
+	^($0 asParser) / (nonZeroDigit , digits optional) 
+!
+
+digits 
+	"digit is already defined, no need to redefine it"
+	^#digit asParser plus
+!
+
+hexDigits 
+
+	^hexDigit plus
+!
+
+hexIntegerLiteral 
+
+  ^ hexNumeral , (integerTypeSuffix optional)
+!
+
+hexNumeral 
+
+	^$0 asParser, (PPPredicateObjectParser anyOf: 'xX' ), hexDigits
+!
+
+integerLiteral
+
+  ^ (hexIntegerLiteral / octalIntegerLiteral / decimalIntegerLiteral) javaToken
+!
+
+integerTypeSuffix
+
+	^ PPPredicateObjectParser anyOf: 'lL'
+!
+
+nonZeroDigit 
+
+	^PPPredicateObjectParser anyOf: '123456789'.
+!
+
+octalDigit 
+
+	^PPPredicateObjectParser anyOf: '01234567'
+!
+
+octalDigits
+
+	^ octalDigit plus
+!
+
+octalIntegerLiteral 
+
+ ^ octalNumeral , (integerTypeSuffix optional)
+!
+
+octalNumeral 
+
+	^($0 asParser) , octalDigits
+! !
+
+!PPJavaLexicon methodsFor:'grammar-literals-null'!
+
+nullLiteral 
+
+ ^ nullToken
+! !
+
+!PPJavaLexicon methodsFor:'grammar-literals-string'!
+
+stringCharacter
+		
+	^ ( ( PPPredicateObjectParser anyOf: '"\') not , inputCharacter ==> #second ) /
+	   escapeSequence 
+!
+
+stringCharacters
+
+	^ stringCharacter plus
+!
+
+stringLiteral 
+
+ ^ ($" asParser , stringCharacters optional , $" asParser) javaToken
+! !
+
+!PPJavaLexicon methodsFor:'grammar-operators'!
+
+operator
+	| operatorParsers |
+	
+	operatorParsers := operators keysSortedSafely 
+								collect: [:eachKey | operators at: eachKey ].
+						
+	^self asToken:  (operatorParsers reduce: [ :a :b | a / b ]) 
+! !
+
+!PPJavaLexicon methodsFor:'grammar-separators'!
+
+separator	
+	^self asToken: (PPPredicateObjectParser anyOf: '(){}[];,.' )
+! !
+
+!PPJavaLexicon methodsFor:'grammar-unicode-escapes'!
+
+hexDigit 
+
+	^#hex asParser
+!
+
+rawInputCharacter
+
+	^#any asParser
+!
+
+unicodeEscape
+
+	^ $\ asParser , unicodeMarker , hexDigit , hexDigit , hexDigit , hexDigit
+!
+
+unicodeInputCharacter
+	 ^ unicodeEscape / rawInputCharacter
+!
+
+unicodeMarker
+
+	^$u asParser plus
+! !
+
+!PPJavaLexicon methodsFor:'grammar-whiteSpace'!
+
+whiteSpace
+
+	^ (Character space asParser ) /
+	  (Character tab asParser ) /
+	  ((Character value: 12) asParser ) /
+		lineTerminator 
+! !
+
+!PPJavaLexicon methodsFor:'initialization'!
+
+initialize
+
+	super initialize.
+	
+	self initializeKeywords.
+	self initializeOperators.
+	self initializeSeparators.
+!
+
+initializeKeywords
+
+	| values |
+	keywords := Dictionary new.
+	values := #('abstract' 'assert' 'boolean' 'break' 'byte' 'case'  'catch' 'char' 'class' 'const'
+	   'continue' 'default' 'do' 'double' 'else' 'enum' 'extends' 'final'  'finally' 'float'
+	   'for' 'if' 'goto' 'implements' 'import' 'instanceof' 'int' 'interface' 'long' 'native'
+	   'new' 'package' 'private' 'protected' 'public' 'return' 'short' 'static' 'strictfp' 'super'
+	   'switch' 'synchronized' 'this' 'throw' 'throws' 'transient' 'try' 'void' 'volatile' 'while').
+	
+	values do: [:eachKeyword |
+		keywords at: eachKeyword 
+			put: (PPUnresolvedParser named: ('keyword', eachKeyword first asUppercase asString , eachKeyword allButFirst))		
+		].
+	
+	keywords keysAndValuesDo:  [:key :value |
+		(keywords at: key) def: (key asParser ,  #word asParser not)]
+!
+
+initializeOperators
+
+	| values |
+	operators := Dictionary new.
+	values := #(	'>>>=' '>>>' '>>=' '>>' '>=' '>'	'<<=' '<<' '<=' '<'	'++' '+=' '+'	'--' '-=' '-'	'&&' '&=' '&'
+					'||' '|=' '|'	'*=' '*'	'%=' '%'	'/=' '/'	'^=' '^'	'!!=' '!!'	'==' '='	'~'	'?'	':'	'@' ).
+	" @ ? perhaps for annotation but not in the doc "
+	values do: [:eachOperator |
+		operators at: eachOperator 
+			put: (PPUnresolvedParser named: ('operator', eachOperator asString))		
+		].
+	
+	operators  keysAndValuesDo:  [:key :value |
+		(operators at: key) def: (key asParser)]
+!
+
+initializeSeparators
+
+	| values |
+	separators := Dictionary new.
+	values := #( '(' ')' '{' '}' '[' ']' ';' ',' '.' ).
+	
+	values do: [:eachSeparator |
+		separators at: eachSeparator 
+			put: (PPUnresolvedParser named: ('separator', eachSeparator asString))		
+		].
+	
+	separators  keysAndValuesDo:  [:key :value |
+		(separators at: key) def: (key asParser)]
+! !
+
+!PPJavaLexicon methodsFor:'utility'!
+
+asToken: aParser
+
+	^aParser javaToken
+!
+
+emptySquaredParenthesis
+
+	^ self asToken: (((self tokenFor: '['), (self tokenFor: ']')))
+!
+
+tokenFor: aString
+
+	^self asToken: (keywords at: aString 
+						ifAbsent: [separators at: aString 
+							ifAbsent: [operators at: aString] ])
+! !
+