compiler/PPCTokenCodeGenerator.st
changeset 515 b5316ef15274
parent 502 1e45d3c96ec5
child 516 3b81c9e53352
child 524 f6f68d32de73
--- a/compiler/PPCTokenCodeGenerator.st	Fri Jul 24 15:06:54 2015 +0100
+++ b/compiler/PPCTokenCodeGenerator.st	Mon Aug 17 12:13:16 2015 +0100
@@ -2,88 +2,281 @@
 
 "{ NameSpace: Smalltalk }"
 
-PPCCodeGenerator subclass:#PPCTokenCodeGenerator
-	instanceVariableNames:''
+PPCNodeVisitor subclass:#PPCTokenCodeGenerator
+	instanceVariableNames:'compiler scannerGenerator fsaCache'
 	classVariableNames:''
 	poolDictionaries:''
 	category:'PetitCompiler-Visitors'
 !
 
-!PPCTokenCodeGenerator methodsFor:'as yet unclassified'!
+!PPCTokenCodeGenerator methodsFor:'accessing'!
+
+arguments: args
+    super arguments: args.
+    scannerGenerator arguments: args
+!
+
+compiler
+    ^ compiler
+!
+
+compiler: anObject
+    compiler := anObject.
+    
+    scannerGenerator compiler idGen: compiler idGen.
+! !
+
+!PPCTokenCodeGenerator methodsFor:'code support'!
+
+consumeWhitespace: node
+    self assert: node isTokenNode.
+
+    node isTrimmingTokenNode ifTrue: [ 
+        compiler code: 'self consumeWhitespace.'
+    ]
+!
 
-afterAccept: node retval: retval
-    | return |
-    return := super afterAccept: node retval: retval.
-    return category: 'generated - tokens'.
-    ^ return
+createTokenInsance: node id: idCode start: startVar end: endVar
+    compiler codeTranscriptShow: 'current token type: ', idCode.
+    compiler codeAssign: idCode, '.' to: 'currentTokenType'.
+    compiler codeAssign: node tokenClass asString, ' on: (context collection) 
+                                                            start: ', startVar, ' 
+                                                            stop: ', endVar, '
+                                                            value: nil.'
+               to: 'currentTokenValue'.
+!
+
+scan: node start: startVar end: endVar
+    node child hasName ifFalse: [ 
+        node child name: node name
+    ].
+
+    compiler codeAssign: 'context position + 1.' to: startVar.
+    compiler add: ((self generateScan: node child) callOn: 'scanner').
+!
+
+unorderedChoiceFromFollowSet: followSet
+    | followFsas  |
+    
+    ^ fsaCache at: followSet ifAbsentPut: [ 
+        followFsas := followSet collect: [ :followNode | 
+                (followNode asFsa) 
+                    name: (compiler idFor: followNode);
+                    retval: (compiler idFor: followNode); 
+                    yourself
+        ].
+        self unorderedChoiceFromFsas: followFsas.
+    ]
+     
 !
 
-fromTokenMode
-    compiler rememberStrategy: (PPCCompilerTokenizingRememberStrategy on: compiler).
-    compiler errorStrategy: (PPCCompilerTokenizingErrorStrategy on: compiler).
+unorderedChoiceFromFsas: fsas
+    | result startState |
+    result := PEGFsa new.
+    startState := PEGFsaState new.
+    
+    result addState: startState.
+    result startState: startState.
+
+    fsas do: [ :fsa | 
+        result adopt: fsa.
+        result addTransitionFrom: startState to: fsa startState.
+    ].
+
+    result determinizeStandard.
+    ^ result
+! !
+
+!PPCTokenCodeGenerator methodsFor:'compiling support'!
+
+compileScanner
+    ^ scannerGenerator compileScannerClass
+!
+
+retvalVar
+    ^ compiler currentReturnVariable
+!
+
+startMethodForNode:node
+    node isMarkedForInline ifTrue:[ 
+        compiler startInline: (compiler idFor: node).
+        compiler codeComment: 'BEGIN inlined code of ' , node printString.
+        compiler indent.
+    ] ifFalse:[ 
+        compiler startMethod: (compiler idFor: node).
+        compiler currentMethod category: 'generated - tokens'.
+        compiler codeComment: 'GENERATED by ' , node printString.
+        compiler allocateReturnVariable.
+    ]
 !
 
-toTokenMode
-    compiler rememberStrategy: (PPCCompilerTokenRememberStrategy on: compiler).	
-    compiler errorStrategy: (PPCCompilerTokenErrorStrategy on: compiler).
+stopMethodForNode:aPPCNode
+    ^ aPPCNode isMarkedForInline ifTrue:[ 
+                compiler dedent.
+                compiler add: '"END inlined code of ' , aPPCNode printString , '"'.
+                compiler stopInline.
+    ] ifFalse:[ 
+                compiler stopMethod
+    ].
+! !
+
+!PPCTokenCodeGenerator methodsFor:'initialization'!
+
+initialize
+    super initialize.
+    
+    scannerGenerator := PPCScannerCodeGenerator new.
+    scannerGenerator arguments: arguments.
+    
+    "for the given set of nodes, remember the unordered choice fsa
+        see `unorderedChoiceFromFollowSet:`
+    "
+    fsaCache := Dictionary new.
+! !
+
+!PPCTokenCodeGenerator methodsFor:'scanning'!
+
+generateNextScan: node
+    | epsilon followSet  anFsa |
+    followSet := node followSetWithTokens.
+    
+    epsilon := followSet anySatisfy: [ :e | e acceptsEpsilon ].
+    followSet := followSet reject: [ :e | e acceptsEpsilon ].
+    epsilon ifTrue: [ followSet add: PPCEndOfFileNode instance ].
+    
+    anFsa := self unorderedChoiceFromFollowSet: followSet.
+
+    anFsa name: 'nextToken_', (compiler idFor: node).
+    node nextFsa: anFsa.
+    ^ scannerGenerator generate: anFsa.
+!
+
+generateScan: node
+    | anFsa |
+    anFsa := node asFsa determinize.
+    anFsa name: (compiler idFor: node).
+    anFsa retval: (compiler idFor: node).
+    
+    ^ scannerGenerator generate: anFsa.
 ! !
 
 !PPCTokenCodeGenerator methodsFor:'visiting'!
 
-visitOptionalNode: node
-    compiler 
-          codeAssignParsedValueOf:[ self visit:node child ]
-          to:self retvalVar.
-    compiler codeAssign: 'false.' to: 'error'.
-    compiler codeReturn.
-!
+visitToken: tokenNode
+    |  id  startVar endVar  numberId |
+    self startMethodForNode: tokenNode.
 
-visitTokenNode: node
-    | id startVar endVar  |
     "Tokens cannot be inlined, 
         - their result is true/false
         - the return value is always stored in currentTokenValue
         - the current token type is always stored in currentTokenType
     "
-    self assert: node isMarkedForInline not.	
+    self assert: tokenNode isMarkedForInline not.
     
     startVar := compiler allocateTemporaryVariableNamed: 'start'.
-    endVar := compiler allocateTemporaryVariableNamed: 'end'.
-
-    id := compiler idFor: node.
-    self toTokenMode.
-
-    compiler add: 'currentTokenType isNil ifFalse: [ ^ currentTokenType == ', id storeString, '].'.	
+    endVar := compiler allocateTemporaryVariableNamed:  'end'.
+    
+    id := compiler idFor: tokenNode.
+    numberId := compiler numberIdFor: id.
+    
+    compiler add: 'currentTokenType isNil ifFalse: [ ^ currentTokenType == ', id storeString, '].'.
+    
+"	compiler codeComment: 'number for: ', id storeString, ' is: ', numberId storeString.
+    compiler codeIf: 'scanner match: ', numberId storeString then: [ 
+        compiler codeAssign: '(scanner resultPosition: ', numberId storeString, ').' to: endVar.
+        self createTokenInsance: tokenNode 
+                id: id storeString 
+                start: '(context position + 1)' 
+                end: endVar.
+        
+        compiler code: 'context position: ', endVar, '.'.
+        
+        self consumeWhitespace: tokenNode.
+        compiler codeReturn: 'true'.
+    ].
+    compiler codeIf: 'scanner backtracked not' then: [ 
+        compiler codeReturn: 'false'.
+    ].
+    compiler codeComment: 'No match, no fail, scanner does not know about this...'.	
+"
     compiler profileTokenRead: id.
     
-    node allNodes size > 2 ifTrue: [ 
-        self addGuard: node ifTrue: nil  ifFalse: [ compiler addOnLine: '^ false' ].
+"	self scan: tokenNode start: startVar end: endVar."
+    "	compiler add: 'self assert: scanner isSingleMatch.'."
+"	compiler codeIf: 'scanner match ' then: ["
+
+    tokenNode child hasName ifFalse: [ 
+        tokenNode child name: tokenNode name
     ].
 
+    compiler codeAssign: 'context position + 1.' to: startVar.
+    compiler codeIf: [ compiler code: ((self generateScan: tokenNode child) callOn: 'scanner') ] then: [ 
+        compiler add: 'context position: scanner resultPosition.'.
+        compiler codeAssign: 'context position.' to: endVar.	
+        self consumeWhitespace: tokenNode.
+        self createTokenInsance: tokenNode id: id storeString start: startVar end: endVar.
+        compiler codeReturn: 'true'.
+    ] else: [ 
+        compiler code: 'scanner backtrackDistinct.'.
+        compiler code: 'context position: ', startVar, ' - 1.'. 
+        compiler codeReturn: 'false'.
+    ].
     
-    compiler codeAssign: 'context position + 1.' to: startVar.
-    compiler codeAssignParsedValueOf:[ self visit:node child ] to:#whatever.
-    compiler add: 'error ifTrue: [ ^ error := false ].'.
+    ^ self stopMethodForNode: tokenNode
+!
 
-    compiler codeAssign: 'context position.' to: endVar.
+visitTokenConsumeNode: node
+    | id   nextScan |
+    self startMethodForNode: node.
+    id := (compiler idFor: node child).
+
+    compiler add: 'self ', id asString, ' ifTrue: ['.
+        compiler indent.
 
-    compiler codeTranscriptShow: 'current token type: ', id storeString.
-    compiler codeAssign: id storeString, '.' to: 'currentTokenType'.
-    compiler codeAssign: node tokenClass asString, ' on: (context collection) 
-                                                                start: ', startVar, '  
-                                                                stop: ', endVar, '
-                                                                value: nil.'
-                to: 'currentTokenValue := ', self retvalVar.
-    
+        nextScan := self generateNextScan: node.
+        
+        node nextFsa hasDistinctRetvals ifTrue: [ 
+            compiler codeAssign: 'currentTokenValue.' to: self retvalVar.
         
-    compiler codeClearError.
-    compiler add: '^ true'.
+            compiler add: (nextScan callOn: 'scanner'), '.'.
+            compiler codeIf: 'scanner match' then: [ 
+            compiler add: 'context position: scanner resultPosition.'.
+                self createTokenInsance: node child 
+                        id: 'scanner result' 
+                        start: 'scanner position + 1' 
+                        end: 'scanner resultPosition'.
+                self consumeWhitespace: node child.
+                compiler codeReturn.
+            ] else: [ 
+                compiler codeComment: 'Looks like there is an error on its way...'.
+                compiler code: 'context position: scanner position.'.
+                compiler codeAssign: 'nil.' to: 'currentTokenType'.
+                compiler codeReturn.
+            ]
 
-    self fromTokenMode.
+        ] ifFalse: [ 
+            compiler codeAssign: 'nil.' to: 'currentTokenType'.
+            compiler codeReturn: 'currentTokenValue'.
+        ].
+        compiler dedent.
+
+    "Token not found"
+    compiler add: '] ifFalse: ['.
+        compiler indent.
+        compiler codeError: id asString, ' expected'.
+        compiler dedent.
+    compiler add: '].'.
+
+    ^ self stopMethodForNode: node
+!
+
+visitTokenNode: node
+    ^ self visitToken: node
 !
 
 visitTrimmingTokenCharacterNode: node
     |  id     |
+    self startMethodForNode:node.
 
     "Tokens cannot be inlined, 
         - their result is true/false
@@ -93,82 +286,22 @@
     self assert: node isMarkedForInline not.
     
     id := compiler idFor: node.
-    self toTokenMode.
     
     compiler add: 'currentTokenType isNil ifFalse: [ ^ currentTokenType == ', id storeString, '].'.
     compiler profileTokenRead: id.
 
-    self addGuard: node ifTrue: nil ifFalse: [ compiler addOnLine: ' ^ false' ].
-
+    compiler add: '(context peek == ', node child character storeString, ') ifFalse: [ ^ false ].'.
     compiler add: 'context next.'.
 
-    compiler codeTranscriptShow: 'current token type: ', id storeString.
-    compiler codeAssign: id storeString, '.' to: 'currentTokenType'.
-    compiler codeAssign: node tokenClass asString, ' on: (context collection) 
-                                                            start: context position 
-                                                            stop: context position
-                                                            value: nil.'
-               to: 'currentTokenValue := ', self retvalVar.
+    self createTokenInsance: node id: id storeString  start: 'context position' end: 'context position'.
+    self consumeWhitespace: node.
     
-    compiler addComment: 'Consume Whitespace:'.
-    compiler 
-          codeAssignParsedValueOf:[ self visit:node whitespace ]
-          to:#whatever.
-    compiler nl.
-    
-    compiler add: '^ true'.
+    compiler codeReturn: 'true'.
 
-    self fromTokenMode.
+    ^ self stopMethodForNode: node
 !
 
 visitTrimmingTokenNode: node
-    |  id  startVar endVar  |
-
-    "Tokens cannot be inlined, 
-        - their result is true/false
-        - the return value is always stored in currentTokenValue
-        - the current token type is always stored in currentTokenType
-    "
-    self assert: node isMarkedForInline not.
-    
-    startVar := compiler allocateTemporaryVariableNamed: 'start'.
-    endVar := compiler allocateTemporaryVariableNamed:  'end'.
-    
-    id := compiler idFor: node.
-    self toTokenMode.
-    
-    compiler add: 'currentTokenType isNil ifFalse: [ ^ currentTokenType == ', id storeString, '].'.
-    compiler profileTokenRead: id.
-    
-    node allNodes size > 2 ifTrue: [ 
-        self addGuard: node ifTrue: nil  ifFalse: [ compiler addOnLine: '^ false' ].
-    ].
-
-    compiler codeAssign: 'context position + 1.' to: startVar.
-    compiler codeAssignParsedValueOf:[ self visit:node child ] to:#whatever.
-
-    compiler add: 'error ifTrue: [ ^ error := false ].'.
-
-        compiler codeAssign: 'context position.' to: endVar.
-    
-        compiler addComment: 'Consume Whitespace:'.
-        compiler 
-              codeAssignParsedValueOf:[ self visit:node whitespace ]
-              to:#whatever.
-        compiler nl.
-    
-    
-        compiler codeTranscriptShow: 'current token type: ', id storeString.
-        compiler codeAssign: id storeString, '.' to: 'currentTokenType'.
-        compiler codeAssign: node tokenClass asString, ' on: (context collection) 
-                                                                start: ', startVar, ' 
-                                                                stop: ', endVar, '
-                                                                value: nil.'
-                   to: 'currentTokenValue := ', self retvalVar.
-
-    compiler codeClearError.
-    compiler add: '^ true'.
-
-    self fromTokenMode.
+    ^ self visitToken: node
 ! !