compiler/tests/PEGFsaScannerIntegrationTest.st
changeset 515 b5316ef15274
parent 502 1e45d3c96ec5
child 524 f6f68d32de73
--- a/compiler/tests/PEGFsaScannerIntegrationTest.st	Fri Jul 24 15:06:54 2015 +0100
+++ b/compiler/tests/PEGFsaScannerIntegrationTest.st	Mon Aug 17 12:13:16 2015 +0100
@@ -3,7 +3,7 @@
 "{ NameSpace: Smalltalk }"
 
 TestCase subclass:#PEGFsaScannerIntegrationTest
-	instanceVariableNames:'fsa fsaGenerator parser scanner result compiled'
+	instanceVariableNames:'fsa fsaGenerator parser scanner result compiled parser1 parser2'
 	classVariableNames:''
 	poolDictionaries:''
 	category:'PetitCompiler-Tests-Scanner'
@@ -11,66 +11,6 @@
 
 !PEGFsaScannerIntegrationTest methodsFor:'as yet unclassified'!
 
-compile
-    | ppcTree |
-    compiled ifTrue: [ ^ self ].
-    ppcTree := parser asCompilerTree.
-    fsa := ppcTree asFsa.
-    fsa name: #nextToken.
-    fsa finalStates do: [ :s | s isFailure ifFalse: [s retval: #token ]].
-
-    scanner := ((PPCScannerCodeGenerator new)
-        generate: fsa).
-
-    compiled := true
-!
-
-failScan: stream
-    self compile.
-
-    scanner initialize.
-    scanner stream: stream asPetitStream. 
-    result := scanner nextToken.
-        
-    self assert: result isEmpty
-!
-
-scan: stream token: token
-    self scan: stream token: token position: stream size.
-!
-
-scan: stream token: token position: position
-    self compile.
-
-    scanner initialize.
-    scanner stream: stream asPetitStream. 
-    result := scanner nextToken.
-        
-    self assert: result isCollection description: 'no collection returned as a result!!'.
-    self assert: (result isEmpty not) description: 'no token found'.
-    self assert: (result at: token) = position.
-!
-
-setUp
-    compiled := false.
-    fsaGenerator := PEGFsaGenerator new.
-!
-
-testA
-    parser := 'a' asParser.
-    
-    self compile.
-
-    self assert: fsa isDeterministic.
-    self assert: fsa isWithoutEpsilons.	
-    
-    self failScan: ''.
-    self failScan: 'b'.
-
-    self scan: 'a' token: #token position: 1.
-    self scan: 'aaa' token: #token position: 1.
-!
-
 testAAA_Aplusnot
     parser := 'aaa' asParser not, $a asParser plus.
     self compile.
@@ -85,6 +25,39 @@
     self failScan: 'aaa'.
     self failScan: 'aaaa'.
     self failScan: 'aaaaa'.
+! !
+
+!PEGFsaScannerIntegrationTest methodsFor:'distinct'!
+
+testAAAnot_Aplus
+    parser := 'aaa' asParser not, $a asParser plus.
+    self compile.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+    self assert: fsa hasDistinctRetvals.
+
+    self scan: 'a' token: #token.
+    self scan: 'aa' token: #token.
+
+    self failScan: ''.
+    self failScan: 'aaa'.
+    self failScan: 'aaaa'.
+    self failScan: 'aaaaa'.
+!
+
+testAAAstar_AA
+    parser := 'aaa' asParser star, 'aa' asParser.
+    
+    self scan: 'aa' token: #token.
+    self scan: 'aaaaa' token: #token.
+    self scan: 'aaaaaaaa' token: #token.
+
+
+    self failScan: 'a'.
+    self failScan: 'aaa'.
+    self failScan: 'aaaa'.
+    self failScan: 'aaaaaaa'.
 !
 
 testAAplus_A
@@ -150,6 +123,35 @@
     self scan: 'aba' token: #token position: 2.
 !
 
+testAXorAXXstar_X
+    parser := ('ax' asParser / 'axx' asParser) plus, 'x' asParser.
+    
+    self compile.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+    
+    self failScan: 'x'.
+    self failScan: ''.
+
+    self scan: 'axx' token: #token position: 3.
+!
+
+testAXorA_X
+    parser := ('ax' asParser / 'a' asParser), $x asParser.
+    self compile.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+    
+    self scan: 'axx' token: #token.
+
+    self failScan: 'ax'.
+    self failScan: 'ab'.
+    self failScan: 'x'.
+    self failScan: ''.		
+!
+
 testA_BCorCD_D
     parser := $a asParser, ('bc' asParser / 'cd' asParser), $d asParser.
     
@@ -194,7 +196,8 @@
 
     self assert: fsa isDeterministic.
     self assert: fsa isWithoutEpsilons.	
-    
+    self assert: fsa hasDistinctRetvals.
+
     self failScan: 'ab'.
     self failScan: 'bb'.
 
@@ -273,7 +276,6 @@
 
 testAorAX_X
     parser := ('a' asParser / 'ax' asParser), $x asParser.
-    
     self compile.
 
     self assert: fsa isDeterministic.
@@ -302,6 +304,20 @@
     self scan: 'bb' token: #token position: 1.
 !
 
+testAorEOF
+    parser := $a asParser / #eof asParser.
+    
+    self compile.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+
+    self scan: 'a' token: #token position: 1.
+    self scan: '' token: #token position: 0.
+    
+    self failScan: 'b'.
+!
+
 testAplus_B
     parser := $a asParser plus, $b asParser.
     
@@ -339,14 +355,18 @@
 
     self assert: fsa isDeterministic.
     self assert: fsa isWithoutEpsilons.	
-    
+    self assert: fsa hasDistinctRetvals.
+
+    self failScan: 'b'.
+    self failScan: 'ab'.
     self failScan: 'aaab'.
-    self failScan: 'b'.
 
-    self scan: '' token: #token position: 0.
-    self scan: 'a' token: #token position: 1.
-    self scan: 'aac' token: #token position: 2.
-    self scan: 'aaaac' token: #token position: 4.
+    self scan: '' token: #token.
+    self scan: 'a' token: #token.
+    self scan: 'aaa' token: #token.
+    self scan: 'c' token: #token position: 0.
+    self scan: 'ac' token: #token position: 1.
+    self scan: 'aaac' token: #token position: 3.
 !
 
 testFoo
@@ -374,10 +394,26 @@
     self scan: '2312' token: #token position: 4.
 !
 
+testRecursive
+    parser := PPDelegateParser new.
+    
+    parser setParser: ($a asParser, parser) / $b asParser.
+    
+    self compile.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+    
+    self failScan: 'c'.
+
+    self scan: 'b' token: #token.
+    self scan: 'ab' token: #token.
+    self scan: 'aaaaab' token: #token.
+!
+
 testSmalltalkIdentifier
     parser := #letter asParser, #word asParser star, $: asParser not.
     self compile.
-
     self assert: fsa isDeterministic.
     self assert: fsa isWithoutEpsilons.	
 
@@ -390,3 +426,264 @@
     self failScan: '123'.	
 ! !
 
+!PEGFsaScannerIntegrationTest methodsFor:'multivalues'!
+
+testA
+    parser1 := 'a' asParser.
+    parser2 := 'a' asParser.
+    
+    self compileMerge.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+    self assert: fsa hasDistinctRetvals not.
+    
+    self failScan: ''.
+    self failScan: 'b'.
+
+    self scan: 'a' token: #token1 position: 1.
+    self scan: 'a' token: #token2 position: 1.
+    self scan: 'aaa' token: #token1 position: 1.
+    self scan: 'aaa' token: #token2 position: 1.
+!
+
+testAplus_BOrAplus_Bnot
+    parser1 := $a asParser plus, $b asParser.
+    parser2 := $a asParser plus, $b asParser not.
+
+    self compileMerge.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+            
+    self failScan: 'aaa' token: #token1.
+    self scan: 'aaa' token: #token2 position: 3.
+
+    self scan: 'aaab' token: #token1 position: 4.
+    self failScan: 'aaab' token: #token2.
+!
+
+testAuorAplus
+    parser1 := 'a' asParser.
+    parser2 := 'a' asParser plus.
+    
+    self compileMerge.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+    self assert: fsa hasDistinctRetvals not.
+    
+    self failScan: 'b' token: #token1.
+    self failScan: 'b' token: #token2.
+
+    self failScan: '' token: #token1.
+    self failScan: '' token: #token2.
+    
+    self scan: 'a' token: #token1 position: 1.
+    self scan: 'a' token: #token2 position: 1.
+
+    self scan: 'aaa' token: #token1 position: 1.
+    self scan: 'aaa' token: #token2 position: 3.
+!
+
+testKeywordOrUnary
+    parser1 := #letter asParser plus, $: asParser.
+    parser2 := #letter asParser plus, $: asParser not.
+    
+    self compileMerge.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+            
+    self failScan: 'false' token: #token1.
+    self scan: 'false' token: #token2 position: 5.
+
+    self scan: 'false:' token: #token1 position: 6.
+    self failScan: 'false:' token: #token2.
+!
+
+testTrueOrId
+    parser1 := 'true' asParser.
+    parser2 := #letter asParser plus.
+    
+    self compileMerge.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+    self assert: fsa hasDistinctRetvals not.
+    
+    self failScan: 'false' token: #token1.
+    self scan: 'false' token: #token2 position: 5.
+
+    self scan: 'true' token: #token1 position: 4.
+    self scan: 'true' token: #token2 position: 4.
+    
+    self scan: 'truecrypt' token: #token1 position: 4.
+    self scan: 'truecrypt' token: #token2 position: 9.
+    
+! !
+
+!PEGFsaScannerIntegrationTest methodsFor:'smalltalk'!
+
+testStIdentifier
+    parser := (PPPredicateObjectParser
+                on: [ :each | each isLetter or: [ each = $_ ] ]
+                message: 'letter expected') ,
+            (PPPredicateObjectParser
+                on: [ :each | each isAlphaNumeric or: [ each = $_ ] ]
+                message: 'letter or digit expected') star.
+
+    self compile.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+    
+    self failScan: ''.
+    self failScan: '23ab'.
+
+    self scan: 'fooBar' token: #token.
+    self scan: 'foo_bar' token: #token.
+!
+
+testStKeyword
+    | identifier |
+    identifier := (PPPredicateObjectParser
+                on: [ :each | each isLetter or: [ each = $_ ] ]
+                message: 'letter expected') ,
+            (PPPredicateObjectParser
+                on: [ :each | each isAlphaNumeric or: [ each = $_ ] ]
+                message: 'letter or digit expected') star.
+    parser := identifier, $: asParser.
+
+    self compile.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+    
+    self failScan: 'fooBar'.
+
+    
+    self scan: 'fooBar:' token: #token.
+    self scan: 'foo_bar:' token: #token.
+!
+
+testStString
+    parser := $' asParser , ('''''' asParser / $' asParser negate) star , $' asParser.
+
+    self compile.
+
+    self assert: fsa isDeterministic.
+    self assert: fsa isWithoutEpsilons.	
+    
+    self failScan: ''.
+    self failScan: 'b'.
+
+    self scan: '''hi there''' token: #token.
+! !
+
+!PEGFsaScannerIntegrationTest methodsFor:'support'!
+
+compile
+    | ppcTree |
+    compiled ifTrue: [ ^ self ].
+
+    ppcTree := parser asCompilerTree.
+    fsa := ppcTree asFsa.
+    fsa retval: #token.
+    fsa determinize.
+    
+    self generate
+!
+
+compileMerge
+    | ppcTree1 ppcTree2 fsa1 fsa2 |
+    compiled ifTrue: [ ^ self ].
+
+    ppcTree1 := parser1 asCompilerTree.
+    ppcTree2 := parser2 asCompilerTree.
+    
+    fsa1 := ppcTree1 asFsa.
+    fsa1 retval: #token1.
+    fsa2 := ppcTree2 asFsa.
+    fsa2 retval: #token2.
+    
+    fsa := self mergeFsa: fsa1 and: fsa2.
+    
+    self generate.
+!
+
+failScan: stream
+    self compile.
+
+    scanner initialize.
+    scanner stream: stream asPetitStream. 
+    scanner nextToken.
+    
+    result := scanner polyResult.
+    
+        
+    self assert: result isEmpty
+!
+
+failScan: stream token: token
+    self compile.
+
+    scanner initialize.
+    scanner stream: stream asPetitStream. 
+    scanner nextToken.
+    
+    result := scanner polyResult.
+    
+        
+    self assert: ((result includesKey: token) not)
+!
+
+generate
+    fsa name: #nextToken.
+    
+    scanner := ((PPCScannerCodeGenerator new)
+        generateAndCompile: fsa).
+
+    compiled := true
+!
+
+mergeFsa: fsa1 and: fsa2
+    | startState |
+    fsa := PEGFsa new.
+    startState := PEGFsaState new.
+    
+    fsa addState: startState.
+    fsa startState: startState.
+
+    fsa adopt: fsa1.
+    fsa addTransitionFrom: startState to: fsa1 startState.
+
+    fsa adopt: fsa2.
+    fsa addTransitionFrom: startState to: fsa2 startState.
+    
+    fsa determinizeStandard.
+    ^ fsa
+!
+
+scan: stream token: token
+    self scan: stream token: token position: stream size.
+!
+
+scan: stream token: token position: position
+    self compile.
+
+    scanner stream: stream asPetitStream. 
+    scanner nextToken.
+
+    result := scanner polyResult.
+        
+    self assert: result isCollection description: 'no collection returned as a result!!'.
+    self assert: (result isEmpty not) description: 'no token found'.
+    self assert: (result at: token) = position.
+!
+
+setUp
+    compiled := false.
+    fsaGenerator := PEGFsaGenerator new.
+! !
+