A super ugly hack to fix line endings in Java comments.
authorJan Vrany <jan.vrany@fit.cvut.cz>
Tue, 21 Apr 2015 17:20:11 +0100
changeset 437 54b3bc9e3987
parent 436 e1c44b571db9
child 438 20598d7ce9fa
child 439 1a7d51c92b9a
A super ugly hack to fix line endings in Java comments. All three - CR, CR-LF and LF - should be supported.
parsers/java/PPJavaLexicon.st
parsers/java/PPJavaLexiconTest.st
parsers/java/PPJavaParserTest.st
--- a/parsers/java/PPJavaLexicon.st	Tue Apr 21 17:06:24 2015 +0100
+++ b/parsers/java/PPJavaLexicon.st	Tue Apr 21 17:20:11 2015 +0100
@@ -1,544 +1,1 @@
-"{ Package: 'stx:goodies/petitparser/parsers/java' }"
-
-"{ NameSpace: Smalltalk }"
-
-PPCompositeParser subclass:#PPJavaLexicon
-	instanceVariableNames:'unicodeEscape rawInputCharacter unicodeMarker hexDigit
-		lineTerminator unicodeInputCharacter inputElements sub
-		inputElement whiteSpace comment javaToken keyword literal
-		separator operator identifier traditionalComment endOfLineComment
-		commentTail charactersInLine commentTailStar notStar
-		notStarNotSlash inputCharacter booleanLiteral nullLiteral
-		identifierChars javaLetter javaLetterOrDigit keywords
-		floatingPointLiteral integerLiteral characterLiteral
-		stringLiteral hexIntegerLiteral octalIntegerLiteral
-		decimalIntegerLiteral decimalNumeral integerTypeSuffix hexNumeral
-		octalNumeral nonZeroDigit digits hexDigits octalDigits octalDigit
-		hexadecimalFloatingPointLiteral decimalFloatingPointLiteral
-		exponentPart floatTypeSuffix exponentIndicator signedInteger sign
-		hexSignificand binaryExponent binaryExponentIndicator
-		escapeSequence singleCharacter stringCharacters stringCharacter
-		octalEscape zeroToThree input operators separators trueToken
-		falseToken nullToken'
-	classVariableNames:''
-	poolDictionaries:''
-	category:'PetitJava-Core'
-!
-
-PPJavaLexicon comment:'A parser with a definitions for some basic Java gramar parts

Grammar rules follow as closely as possible the specification found in "The Java Language Specification Third Edition"

URL = '
-!
-
-
-!PPJavaLexicon class methodsFor:'accessing'!
-
-ignoredNames
-	"Answer a collection of instance-variables that should not be automatically initialized with productions, but that are used internal to the composite parser."
-
-	| newArray |	
-	newArray := Array new: ((self namesToIgnore size) + (super ignoredNames size)).
-	newArray
-		replaceFrom: 1
-		to: self namesToIgnore size
-		with: self namesToIgnore.
-	newArray
-		replaceFrom: (self namesToIgnore size + 1)
-		to: newArray size
-		with: super ignoredNames.	
-	^newArray
-!
-
-namesToIgnore
-
-	^#('keywords' 'operators' 'separators')
-! !
-
-!PPJavaLexicon methodsFor:'accessing'!
-
-start
-	"Default start production."
-
-	^ input end
-! !
-
-!PPJavaLexicon methodsFor:'grammar-comments'!
-
-charactersInLine   
-
-	^ inputCharacter plus
-!
-
-comment
-	"traditional -> /*
-	 endOfLine -> //"
-	^ traditionalComment / endOfLineComment
-!
-
-commentTail
-
-	^ 	('*' asParser , commentTailStar ) /
-		(notStar , commentTail)
-!
-
-commentTailStar 
-
-	^ ('/' asParser ) /
-	  ('*' asParser , commentTailStar ) /
-	  (notStarNotSlash , commentTail )
-!
-
-endOfLineComment 
-
-	^ '//' asParser , charactersInLine optional
-!
-
-notStar
-
-	^  ('*' asParser not , inputCharacter)/lineTerminator
-!
-
-notStarNotSlash  
-
-	^ lineTerminator / ((PPPredicateObjectParser anyOf: '*/') not , inputCharacter )
-!
-
-traditionalComment
-
-	^ '/*' asParser , commentTail
-! !
-
-!PPJavaLexicon methodsFor:'grammar-identifiers'!
-
-identifier 
-
-	^  self asToken: (((keyword not) , (booleanLiteral not) , (nullLiteral not) , identifierChars ))
-!
-
-identifierChars
-	
-	^ javaLetter plus , javaLetterOrDigit star
-!
-
-javaLetter
-
-	^ (#letter asParser) / (PPPredicateObjectParser anyOf: '_$')
-!
-
-javaLetterOrDigit
-
-	^ javaLetter / (#digit asParser)
-! !
-
-!PPJavaLexicon methodsFor:'grammar-input'!
-
-input
-
-	^ (inputElements optional) , (sub optional)
-!
-
-inputElement
-
-	^ whiteSpace / comment / javaToken
-!
-
-inputElements
-
-	^ inputElement plus
-!
-
-javaToken
-
-
-	^ identifier / keyword / literal / separator / operator
-!
-
-sub
-
-	^ (Character value: 26) asParser 
-! !
-
-!PPJavaLexicon methodsFor:'grammar-keywords'!
-
-keyword
-
-        | keywordParsers |
-        
-        keywordParsers := keywords keys asSortedCollection collect: [:eachKey | keywords at: eachKey ].
-        ^ self asToken: ( (keywordParsers reduce: [ :a :b | a / b ]) )
-
-    "Modified (format): / 21-04-2015 / 15:27:29 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-! !
-
-!PPJavaLexicon methodsFor:'grammar-lineTerminators'!
-
-inputCharacter 
-
-	^(lineTerminator not) , unicodeInputCharacter ==> #second
-!
-
-lineTerminator
-
-        ^ ((Character codePoint: 10)   asParser) / ((Character codePoint: 13) asParser , ((Character codePoint: 10) asParser ) optional )
-
-    "Modified: / 21-04-2015 / 16:55:52 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-! !
-
-!PPJavaLexicon methodsFor:'grammar-literals'!
-
-literal
-	"a literal must be a single token. Whitespaces are not allowed inside the literal"
-	
-	^ nullLiteral / booleanLiteral / floatingPointLiteral / integerLiteral / characterLiteral / stringLiteral
-! !
-
-!PPJavaLexicon methodsFor:'grammar-literals-boolean'!
-
-booleanLiteral 
-
- ^ trueToken / falseToken
-!
-
-falseToken
-	^ ('false' asParser , #word asParser not) javaToken
-!
-
-nullToken
-	^ ('null' asParser , #word asParser not) javaToken
-!
-
-trueToken
-	^ ('true' asParser , #word asParser not) javaToken
-! !
-
-!PPJavaLexicon methodsFor:'grammar-literals-character'!
-
-characterLiteral 
-
- ^ ($' asParser , ( escapeSequence / singleCharacter ), $' asParser) javaToken
-!
-
-singleCharacter 	
-
-	^( PPPredicateObjectParser anyOf: '''\') not , inputCharacter ==> #second
-! !
-
-!PPJavaLexicon methodsFor:'grammar-literals-escape'!
-
-escapeSequence 
-
-	^ ($\ asParser , (PPPredicateObjectParser anyOf: 'btnfr""''\' ) ) /
-	   octalEscape 
-!
-
-octalEscape
-
-	^ $\ asParser , ( (zeroToThree , octalDigit , octalDigit) / (octalDigit , octalDigit optional) )
-!
-
-zeroToThree
-
-	^PPPredicateObjectParser anyOf: '0123'
-! !
-
-!PPJavaLexicon methodsFor:'grammar-literals-floating'!
-
-binaryExponent
-
- ^ binaryExponentIndicator , signedInteger
-!
-
-binaryExponentIndicator
-
-  ^ PPPredicateObjectParser anyOf: 'pP'
-!
-
-decimalFloatingPointLiteral
-
-	|dot|
-	dot := $. asParser.
-
- ^ ( ( (dot , digits) 
-        / 
-        (digits , dot , digits optional)) , 
-			exponentPart optional , floatTypeSuffix optional ) 
-  	/ 
-  	(digits , 
-		( (exponentPart , floatTypeSuffix optional) 
-		  /
-		  (exponentPart optional , floatTypeSuffix) ))
-!
-
-exponentIndicator
-
-  ^ PPPredicateObjectParser anyOf: 'eE'
-!
-
-exponentPart
-
- ^ exponentIndicator , signedInteger
-!
-
-floatTypeSuffix
-
-	^ PPPredicateObjectParser anyOf: 'fFdD'
-!
-
-floatingPointLiteral
-
-  ^ (hexadecimalFloatingPointLiteral / decimalFloatingPointLiteral) javaToken
-!
-
-hexSignificand 
-	|dot|
-	dot := $. asParser.
-
- ^  (hexNumeral , dot optional) /
-    ($0 asParser , (PPPredicateObjectParser anyOf: 'xX') , hexDigits optional , dot , hexDigits )
-!
-
-hexadecimalFloatingPointLiteral
-
- ^ hexSignificand , binaryExponent , floatTypeSuffix optional
-!
-
-sign
-
-  ^PPPredicateObjectParser anyOf: '-+'
-!
-
-signedInteger
-
-  ^ sign optional , digits
-! !
-
-!PPJavaLexicon methodsFor:'grammar-literals-integer'!
-
-decimalIntegerLiteral
-
- ^ decimalNumeral , (integerTypeSuffix optional)
-!
-
-decimalNumeral 
-
-	^($0 asParser) / (nonZeroDigit , digits optional) 
-!
-
-digits 
-	"digit is already defined, no need to redefine it"
-	^#digit asParser plus
-!
-
-hexDigits 
-
-	^hexDigit plus
-!
-
-hexIntegerLiteral 
-
-  ^ hexNumeral , (integerTypeSuffix optional)
-!
-
-hexNumeral 
-
-	^$0 asParser, (PPPredicateObjectParser anyOf: 'xX' ), hexDigits
-!
-
-integerLiteral
-
-  ^ (hexIntegerLiteral / octalIntegerLiteral / decimalIntegerLiteral) javaToken
-!
-
-integerTypeSuffix
-
-	^ PPPredicateObjectParser anyOf: 'lL'
-!
-
-nonZeroDigit 
-
-	^PPPredicateObjectParser anyOf: '123456789'.
-!
-
-octalDigit 
-
-	^PPPredicateObjectParser anyOf: '01234567'
-!
-
-octalDigits
-
-	^ octalDigit plus
-!
-
-octalIntegerLiteral 
-
- ^ octalNumeral , (integerTypeSuffix optional)
-!
-
-octalNumeral 
-
-	^($0 asParser) , octalDigits
-! !
-
-!PPJavaLexicon methodsFor:'grammar-literals-null'!
-
-nullLiteral 
-
- ^ nullToken
-! !
-
-!PPJavaLexicon methodsFor:'grammar-literals-string'!
-
-stringCharacter
-		
-	^ ( ( PPPredicateObjectParser anyOf: '"\') not , inputCharacter ==> #second ) /
-	   escapeSequence 
-!
-
-stringCharacters
-
-	^ stringCharacter plus
-!
-
-stringLiteral 
-
- ^ ($" asParser , stringCharacters optional , $" asParser) javaToken
-! !
-
-!PPJavaLexicon methodsFor:'grammar-operators'!
-
-operator
-        | operatorParsers |
-        
-        operatorParsers := operators keys asSortedCollection collect: [:eachKey | operators at: eachKey ].                                                
-        ^self asToken:  (operatorParsers reduce: [ :a :b | a / b ])
-
-    "Modified: / 21-04-2015 / 15:26:21 / Jan Vrany <jan.vrany@fit.cvut.cz>"
-! !
-
-!PPJavaLexicon methodsFor:'grammar-separators'!
-
-separator	
-	^self asToken: (PPPredicateObjectParser anyOf: '(){}[];,.' )
-! !
-
-!PPJavaLexicon methodsFor:'grammar-unicode-escapes'!
-
-hexDigit 
-
-	^#hex asParser
-!
-
-rawInputCharacter
-
-	^#any asParser
-!
-
-unicodeEscape
-
-	^ $\ asParser , unicodeMarker , hexDigit , hexDigit , hexDigit , hexDigit
-!
-
-unicodeInputCharacter
-	 ^ unicodeEscape / rawInputCharacter
-!
-
-unicodeMarker
-
-	^$u asParser plus
-! !
-
-!PPJavaLexicon methodsFor:'grammar-whiteSpace'!
-
-whiteSpace
-
-	^ (Character space asParser ) /
-	  (Character tab asParser ) /
-	  ((Character value: 12) asParser ) /
-		lineTerminator 
-! !
-
-!PPJavaLexicon methodsFor:'initialization'!
-
-initialize
-
-	super initialize.
-	
-	self initializeKeywords.
-	self initializeOperators.
-	self initializeSeparators.
-!
-
-initializeKeywords
-
-	| values |
-	keywords := Dictionary new.
-	values := #('abstract' 'assert' 'boolean' 'break' 'byte' 'case'  'catch' 'char' 'class' 'const'
-	   'continue' 'default' 'do' 'double' 'else' 'enum' 'extends' 'final'  'finally' 'float'
-	   'for' 'if' 'goto' 'implements' 'import' 'instanceof' 'int' 'interface' 'long' 'native'
-	   'new' 'package' 'private' 'protected' 'public' 'return' 'short' 'static' 'strictfp' 'super'
-	   'switch' 'synchronized' 'this' 'throw' 'throws' 'transient' 'try' 'void' 'volatile' 'while').
-	
-	values do: [:eachKeyword |
-		keywords at: eachKeyword 
-			put: (PPUnresolvedParser named: ('keyword', eachKeyword first asUppercase asString , eachKeyword allButFirst))		
-		].
-	
-	keywords keysAndValuesDo:  [:key :value |
-		(keywords at: key) def: (key asParser ,  #word asParser not)]
-!
-
-initializeOperators
-
-	| values |
-	operators := Dictionary new.
-	values := #(	'>>>=' '>>>' '>>=' '>>' '>=' '>'	'<<=' '<<' '<=' '<'	'++' '+=' '+'	'--' '-=' '-'	'&&' '&=' '&'
-					'||' '|=' '|'	'*=' '*'	'%=' '%'	'/=' '/'	'^=' '^'	'!!=' '!!'	'==' '='	'~'	'?'	':'	'@' ).
-	" @ ? perhaps for annotation but not in the doc "
-	values do: [:eachOperator |
-		operators at: eachOperator 
-			put: (PPUnresolvedParser named: ('operator', eachOperator asString))		
-		].
-	
-	operators  keysAndValuesDo:  [:key :value |
-		(operators at: key) def: (key asParser)]
-!
-
-initializeSeparators
-
-	| values |
-	separators := Dictionary new.
-	values := #( '(' ')' '{' '}' '[' ']' ';' ',' '.' ).
-	
-	values do: [:eachSeparator |
-		separators at: eachSeparator 
-			put: (PPUnresolvedParser named: ('separator', eachSeparator asString))		
-		].
-	
-	separators  keysAndValuesDo:  [:key :value |
-		(separators at: key) def: (key asParser)]
-! !
-
-!PPJavaLexicon methodsFor:'utility'!
-
-asToken: aParser
-
-	^aParser javaToken
-!
-
-emptySquaredParenthesis
-
-	^ self asToken: (((self tokenFor: '['), (self tokenFor: ']')))
-!
-
-tokenFor: aString
-
-	^self asToken: (keywords at: aString 
-						ifAbsent: [separators at: aString 
-							ifAbsent: [operators at: aString] ])
-! !
-
-!PPJavaLexicon class methodsFor:'documentation'!
-
-version_HG
-
-    ^ '$Changeset: <not expanded> $'
-! !
-
+"{ Package: 'stx:goodies/petitparser/parsers/java' }"

"{ NameSpace: Smalltalk }"

PPCompositeParser subclass:#PPJavaLexicon
	instanceVariableNames:'unicodeEscape rawInputCharacter unicodeMarker hexDigit
		lineTerminator unicodeInputCharacter inputElements sub
		inputElement whiteSpace comment javaToken keyword literal
		separator operator identifier traditionalComment endOfLineComment
		commentTail charactersInLine commentTailStar notStar
		notStarNotSlash inputCharacter booleanLiteral nullLiteral
		identifierChars javaLetter javaLetterOrDigit keywords
		floatingPointLiteral integerLiteral characterLiteral
		stringLiteral hexIntegerLiteral octalIntegerLiteral
		decimalIntegerLiteral decimalNumeral integerTypeSuffix hexNumeral
		octalNumeral nonZeroDigit digits hexDigits octalDigits octalDigit
		hexadecimalFloatingPointLiteral decimalFloatingPointLiteral
		exponentPart floatTypeSuffix exponentIndicator signedInteger sign
		hexSignificand binaryExponent binaryExponentIndicator
		escapeSequence singleCharacter stringCharacters stringCharacter
		octalEscape zeroToThree input operators separators trueToken
		falseToken nullToken'
	classVariableNames:''
	poolDictionaries:''
	category:'PetitJava-Core'
!

PPJavaLexicon comment:'A parser with a definitions for some basic Java gramar parts

Grammar rules follow as closely as possible the specification found in "The Java Language Specification Third Edition"

URL = '
!


!PPJavaLexicon class methodsFor:'accessing'!

ignoredNames
	"Answer a collection of instance-variables that should not be automatically initialized with productions, but that are used internal to the composite parser."

	| newArray |	
	newArray := Array new: ((self namesToIgnore size) + (super ignoredNames size)).
	newArray
		replaceFrom: 1
		to: self namesToIgnore size
		with: self namesToIgnore.
	newArray
		replaceFrom: (self namesToIgnore size + 1)
		to: newArray size
		with: super ignoredNames.	
	^newArray
!

namesToIgnore

	^#('keywords' 'operators' 'separators')
! !

!PPJavaLexicon methodsFor:'accessing'!

start
	"Default start production."

	^ input end
! !

!PPJavaLexicon methodsFor:'grammar-comments'!

charactersInLine   

	^ inputCharacter plus
!

comment
	"traditional -> /*
	 endOfLine -> //"
	^ traditionalComment / endOfLineComment
!

commentTail

	^ 	('*' asParser , commentTailStar ) /
		(notStar , commentTail)
!

commentTailStar 

	^ ('/' asParser ) /
	  ('*' asParser , commentTailStar ) /
	  (notStarNotSlash , commentTail )
!

endOfLineComment 

	^ '//' asParser , charactersInLine optional
!

notStar

	^  ('*' asParser not , inputCharacter)/lineTerminator
!

notStarNotSlash  

	^ lineTerminator / ((PPPredicateObjectParser anyOf: '*/') not , inputCharacter )
!

traditionalComment

	^ '/*' asParser , commentTail
! !

!PPJavaLexicon methodsFor:'grammar-identifiers'!

identifier 

	^  self asToken: (((keyword not) , (booleanLiteral not) , (nullLiteral not) , identifierChars ))
!

identifierChars
	
	^ javaLetter plus , javaLetterOrDigit star
!

javaLetter

	^ (#letter asParser) / (PPPredicateObjectParser anyOf: '_$')
!

javaLetterOrDigit

	^ javaLetter / (#digit asParser)
! !

!PPJavaLexicon methodsFor:'grammar-input'!

input

	^ (inputElements optional) , (sub optional)
!

inputElement

	^ whiteSpace / comment / javaToken
!

inputElements

	^ inputElement plus
!

javaToken


	^ identifier / keyword / literal / separator / operator
!

sub

	^ (Character value: 26) asParser 
! !

!PPJavaLexicon methodsFor:'grammar-keywords'!

keyword

        | keywordParsers |
        
        keywordParsers := keywords keys asSortedCollection collect: [:eachKey | keywords at: eachKey ].
        ^ self asToken: ( (keywordParsers reduce: [ :a :b | a / b ]) )

    "Modified (format): / 21-04-2015 / 15:27:29 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!PPJavaLexicon methodsFor:'grammar-lineTerminators'!

inputCharacter 

	^(lineTerminator not) , unicodeInputCharacter ==> #second
!

lineTerminator

    self flag: 'Hack alert - should be fixed immediately in PJTraditionalCommentsNode>>comment:'.

        ^ (((Character codePoint: 10) asParser) ==> [ :lf | Array with: lf with: nil ])
          / (((Character codePoint: 13) asParser , ((Character codePoint: 10) asParser ) optional )) ==> [ :nodes | Array with: nodes first with: nil ]

    "Modified: / 21-04-2015 / 17:16:50 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!PPJavaLexicon methodsFor:'grammar-literals'!

literal
	"a literal must be a single token. Whitespaces are not allowed inside the literal"
	
	^ nullLiteral / booleanLiteral / floatingPointLiteral / integerLiteral / characterLiteral / stringLiteral
! !

!PPJavaLexicon methodsFor:'grammar-literals-boolean'!

booleanLiteral 

 ^ trueToken / falseToken
!

falseToken
	^ ('false' asParser , #word asParser not) javaToken
!

nullToken
	^ ('null' asParser , #word asParser not) javaToken
!

trueToken
	^ ('true' asParser , #word asParser not) javaToken
! !

!PPJavaLexicon methodsFor:'grammar-literals-character'!

characterLiteral 

 ^ ($' asParser , ( escapeSequence / singleCharacter ), $' asParser) javaToken
!

singleCharacter 	

	^( PPPredicateObjectParser anyOf: '''\') not , inputCharacter ==> #second
! !

!PPJavaLexicon methodsFor:'grammar-literals-escape'!

escapeSequence 

	^ ($\ asParser , (PPPredicateObjectParser anyOf: 'btnfr""''\' ) ) /
	   octalEscape 
!

octalEscape

	^ $\ asParser , ( (zeroToThree , octalDigit , octalDigit) / (octalDigit , octalDigit optional) )
!

zeroToThree

	^PPPredicateObjectParser anyOf: '0123'
! !

!PPJavaLexicon methodsFor:'grammar-literals-floating'!

binaryExponent

 ^ binaryExponentIndicator , signedInteger
!

binaryExponentIndicator

  ^ PPPredicateObjectParser anyOf: 'pP'
!

decimalFloatingPointLiteral

	|dot|
	dot := $. asParser.

 ^ ( ( (dot , digits) 
        / 
        (digits , dot , digits optional)) , 
			exponentPart optional , floatTypeSuffix optional ) 
  	/ 
  	(digits , 
		( (exponentPart , floatTypeSuffix optional) 
		  /
		  (exponentPart optional , floatTypeSuffix) ))
!

exponentIndicator

  ^ PPPredicateObjectParser anyOf: 'eE'
!

exponentPart

 ^ exponentIndicator , signedInteger
!

floatTypeSuffix

	^ PPPredicateObjectParser anyOf: 'fFdD'
!

floatingPointLiteral

  ^ (hexadecimalFloatingPointLiteral / decimalFloatingPointLiteral) javaToken
!

hexSignificand 
	|dot|
	dot := $. asParser.

 ^  (hexNumeral , dot optional) /
    ($0 asParser , (PPPredicateObjectParser anyOf: 'xX') , hexDigits optional , dot , hexDigits )
!

hexadecimalFloatingPointLiteral

 ^ hexSignificand , binaryExponent , floatTypeSuffix optional
!

sign

  ^PPPredicateObjectParser anyOf: '-+'
!

signedInteger

  ^ sign optional , digits
! !

!PPJavaLexicon methodsFor:'grammar-literals-integer'!

decimalIntegerLiteral

 ^ decimalNumeral , (integerTypeSuffix optional)
!

decimalNumeral 

	^($0 asParser) / (nonZeroDigit , digits optional) 
!

digits 
	"digit is already defined, no need to redefine it"
	^#digit asParser plus
!

hexDigits 

	^hexDigit plus
!

hexIntegerLiteral 

  ^ hexNumeral , (integerTypeSuffix optional)
!

hexNumeral 

	^$0 asParser, (PPPredicateObjectParser anyOf: 'xX' ), hexDigits
!

integerLiteral

  ^ (hexIntegerLiteral / octalIntegerLiteral / decimalIntegerLiteral) javaToken
!

integerTypeSuffix

	^ PPPredicateObjectParser anyOf: 'lL'
!

nonZeroDigit 

	^PPPredicateObjectParser anyOf: '123456789'.
!

octalDigit 

	^PPPredicateObjectParser anyOf: '01234567'
!

octalDigits

	^ octalDigit plus
!

octalIntegerLiteral 

 ^ octalNumeral , (integerTypeSuffix optional)
!

octalNumeral 

	^($0 asParser) , octalDigits
! !

!PPJavaLexicon methodsFor:'grammar-literals-null'!

nullLiteral 

 ^ nullToken
! !

!PPJavaLexicon methodsFor:'grammar-literals-string'!

stringCharacter
		
	^ ( ( PPPredicateObjectParser anyOf: '"\') not , inputCharacter ==> #second ) /
	   escapeSequence 
!

stringCharacters

	^ stringCharacter plus
!

stringLiteral 

 ^ ($" asParser , stringCharacters optional , $" asParser) javaToken
! !

!PPJavaLexicon methodsFor:'grammar-operators'!

operator
        | operatorParsers |
        
        operatorParsers := operators keys asSortedCollection collect: [:eachKey | operators at: eachKey ].                                                
        ^self asToken:  (operatorParsers reduce: [ :a :b | a / b ])

    "Modified: / 21-04-2015 / 15:26:21 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!PPJavaLexicon methodsFor:'grammar-separators'!

separator	
	^self asToken: (PPPredicateObjectParser anyOf: '(){}[];,.' )
! !

!PPJavaLexicon methodsFor:'grammar-unicode-escapes'!

hexDigit 

	^#hex asParser
!

rawInputCharacter

	^#any asParser
!

unicodeEscape

	^ $\ asParser , unicodeMarker , hexDigit , hexDigit , hexDigit , hexDigit
!

unicodeInputCharacter
	 ^ unicodeEscape / rawInputCharacter
!

unicodeMarker

	^$u asParser plus
! !

!PPJavaLexicon methodsFor:'grammar-whiteSpace'!

whiteSpace

	^ (Character space asParser ) /
	  (Character tab asParser ) /
	  ((Character value: 12) asParser ) /
		lineTerminator 
! !

!PPJavaLexicon methodsFor:'initialization'!

initialize

	super initialize.
	
	self initializeKeywords.
	self initializeOperators.
	self initializeSeparators.
!

initializeKeywords

	| values |
	keywords := Dictionary new.
	values := #('abstract' 'assert' 'boolean' 'break' 'byte' 'case'  'catch' 'char' 'class' 'const'
	   'continue' 'default' 'do' 'double' 'else' 'enum' 'extends' 'final'  'finally' 'float'
	   'for' 'if' 'goto' 'implements' 'import' 'instanceof' 'int' 'interface' 'long' 'native'
	   'new' 'package' 'private' 'protected' 'public' 'return' 'short' 'static' 'strictfp' 'super'
	   'switch' 'synchronized' 'this' 'throw' 'throws' 'transient' 'try' 'void' 'volatile' 'while').
	
	values do: [:eachKeyword |
		keywords at: eachKeyword 
			put: (PPUnresolvedParser named: ('keyword', eachKeyword first asUppercase asString , eachKeyword allButFirst))		
		].
	
	keywords keysAndValuesDo:  [:key :value |
		(keywords at: key) def: (key asParser ,  #word asParser not)]
!

initializeOperators

	| values |
	operators := Dictionary new.
	values := #(	'>>>=' '>>>' '>>=' '>>' '>=' '>'	'<<=' '<<' '<=' '<'	'++' '+=' '+'	'--' '-=' '-'	'&&' '&=' '&'
					'||' '|=' '|'	'*=' '*'	'%=' '%'	'/=' '/'	'^=' '^'	'!!=' '!!'	'==' '='	'~'	'?'	':'	'@' ).
	" @ ? perhaps for annotation but not in the doc "
	values do: [:eachOperator |
		operators at: eachOperator 
			put: (PPUnresolvedParser named: ('operator', eachOperator asString))		
		].
	
	operators  keysAndValuesDo:  [:key :value |
		(operators at: key) def: (key asParser)]
!

initializeSeparators

	| values |
	separators := Dictionary new.
	values := #( '(' ')' '{' '}' '[' ']' ';' ',' '.' ).
	
	values do: [:eachSeparator |
		separators at: eachSeparator 
			put: (PPUnresolvedParser named: ('separator', eachSeparator asString))		
		].
	
	separators  keysAndValuesDo:  [:key :value |
		(separators at: key) def: (key asParser)]
! !

!PPJavaLexicon methodsFor:'utility'!

asToken: aParser

	^aParser javaToken
!

emptySquaredParenthesis

	^ self asToken: (((self tokenFor: '['), (self tokenFor: ']')))
!

tokenFor: aString

	^self asToken: (keywords at: aString 
						ifAbsent: [separators at: aString 
							ifAbsent: [operators at: aString] ])
! !

!PPJavaLexicon class methodsFor:'documentation'!

version_HG

    ^ '$Changeset: <not expanded> $'
! !
\ No newline at end of file
--- a/parsers/java/PPJavaLexiconTest.st	Tue Apr 21 17:06:24 2015 +0100
+++ b/parsers/java/PPJavaLexiconTest.st	Tue Apr 21 17:20:11 2015 +0100
@@ -80,6 +80,33 @@
 		rule: #comment
 !
 
+testComment8cr
+
+        self 
+                parse: '/*',(Character codePoint: 13),'*/'
+                rule: #comment
+
+    "Created: / 21-04-2015 / 17:07:40 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
+testComment8crlf
+
+        self 
+                parse: '/*',(Character codePoint: 13),(Character codePoint: 10),'*/'
+                rule: #comment
+
+    "Created: / 21-04-2015 / 17:07:58 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
+testComment8lf
+
+        self 
+                parse: '/*',(Character codePoint: 10),'*/'
+                rule: #comment
+
+    "Created: / 21-04-2015 / 17:07:46 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
 testTraditionalComment1
 
 	self 
--- a/parsers/java/PPJavaParserTest.st	Tue Apr 21 17:06:24 2015 +0100
+++ b/parsers/java/PPJavaParserTest.st	Tue Apr 21 17:20:11 2015 +0100
@@ -94,6 +94,30 @@
 	self assert: (result comment) equals: '*     * @param args      * @return void     '.
 !
 
+testComment8cr
+        super testComment8cr.
+        self assert: result class equals: PJTraditionalCommentsNode .
+        self assert: (result comment) equals: ''.
+
+    "Created: / 21-04-2015 / 17:08:31 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
+testComment8crlf
+        super testComment8crlf.
+        self assert: result class equals: PJTraditionalCommentsNode .
+        self assert: (result comment) equals: ''.
+
+    "Created: / 21-04-2015 / 17:08:37 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
+testComment8lf
+        super testComment8lf.
+        self assert: result class equals: PJTraditionalCommentsNode .
+        self assert: (result comment) equals: ''.
+
+    "Created: / 21-04-2015 / 17:08:44 / Jan Vrany <jan.vrany@fit.cvut.cz>"
+!
+
 testSeparator1
 super testSeparator1.
 	self assert: result class equals: PJSeparatorNode .