compiler/PPCTokenCodeGenerator.st
changeset 524 f6f68d32de73
parent 515 b5316ef15274
child 525 751532c8f3db
equal deleted inserted replaced
515:b5316ef15274 524:f6f68d32de73
     1 "{ Package: 'stx:goodies/petitparser/compiler' }"
     1 "{ Package: 'stx:goodies/petitparser/compiler' }"
     2 
     2 
     3 "{ NameSpace: Smalltalk }"
     3 "{ NameSpace: Smalltalk }"
     4 
     4 
     5 PPCNodeVisitor subclass:#PPCTokenCodeGenerator
     5 PPCNodeVisitor subclass:#PPCTokenCodeGenerator
     6 	instanceVariableNames:'compiler scannerGenerator fsaCache'
     6 	instanceVariableNames:'codeGen'
     7 	classVariableNames:''
     7 	classVariableNames:''
     8 	poolDictionaries:''
     8 	poolDictionaries:''
     9 	category:'PetitCompiler-Visitors'
     9 	category:'PetitCompiler-Visitors-CodeGenerators'
    10 !
    10 !
    11 
    11 
    12 !PPCTokenCodeGenerator methodsFor:'accessing'!
    12 !PPCTokenCodeGenerator methodsFor:'accessing'!
    13 
    13 
    14 arguments: args
    14 arguments: args
    15     super arguments: args.
    15     super arguments: args.
    16     scannerGenerator arguments: args
    16     codeGen arguments: args.
    17 !
    17 !
    18 
    18 
    19 compiler
    19 clazz: aPPCClass
    20     ^ compiler
    20     codeGen clazz: aPPCClass
    21 !
       
    22 
       
    23 compiler: anObject
       
    24     compiler := anObject.
       
    25     
       
    26     scannerGenerator compiler idGen: compiler idGen.
       
    27 ! !
    21 ! !
    28 
    22 
    29 !PPCTokenCodeGenerator methodsFor:'code support'!
    23 !PPCTokenCodeGenerator methodsFor:'code support'!
    30 
    24 
    31 consumeWhitespace: node
    25 consumeWhitespace: node
    32     self assert: node isTokenNode.
    26     self assert: node isTokenNode.
    33 
    27 
    34     node isTrimmingTokenNode ifTrue: [ 
    28     node isTrimmingTokenNode ifTrue: [ 
    35         compiler code: 'self consumeWhitespace.'
    29         codeGen code: 'self scan_consumeWhitespace.'
    36     ]
    30     ]
    37 !
    31 !
    38 
    32 
    39 createTokenInsance: node id: idCode start: startVar end: endVar
    33 createTokenInstance: node id: idCode start: startVar end: endVar
    40     compiler codeTranscriptShow: 'current token type: ', idCode.
    34     codeGen startInline.
    41     compiler codeAssign: idCode, '.' to: 'currentTokenType'.
    35     codeGen codeTranscriptShow: 'current token type: ', idCode.
    42     compiler codeAssign: node tokenClass asString, ' on: (context collection) 
    36     codeGen codeAssign: node tokenClass asString, ' on: (context collection) 
    43                                                             start: ', startVar, ' 
    37                                                             start: ', startVar, ' 
    44                                                             stop: ', endVar, '
    38                                                             stop: ', endVar, '
    45                                                             value: nil.'
    39                                                             value: nil.'
    46                to: 'currentTokenValue'.
    40                to: self retvalVar.
       
    41     ^ codeGen stopInline
    47 !
    42 !
    48 
    43 
    49 scan: node start: startVar end: endVar
    44 scan: node start: startVar end: endVar
    50     node child hasName ifFalse: [ 
    45     node child hasName ifFalse: [ 
    51         node child name: node name
    46         node child name: node name
    52     ].
    47     ].
    53 
    48 
    54     compiler codeAssign: 'context position + 1.' to: startVar.
    49     codeGen codeAssign: 'context position + 1.' to: startVar.
    55     compiler add: ((self generateScan: node child) callOn: 'scanner').
    50     codeGen add: ((self generateScan: node child) callOn: 'scanner').
    56 !
    51 ! !
    57 
    52 
    58 unorderedChoiceFromFollowSet: followSet
    53 !PPCTokenCodeGenerator methodsFor:'compiling support'!
    59     | followFsas  |
    54 
    60     
    55 retvalVar
    61     ^ fsaCache at: followSet ifAbsentPut: [ 
    56     ^ codeGen currentReturnVariable
    62         followFsas := followSet collect: [ :followNode | 
    57 !
    63                 (followNode asFsa) 
    58 
    64                     name: (compiler idFor: followNode);
    59 startMethodForNode:node
    65                     retval: (compiler idFor: followNode); 
    60 
    66                     yourself
    61     node isMarkedForInline ifTrue:[ 
    67         ].
    62         codeGen startInline: (codeGen idFor: node).
    68         self unorderedChoiceFromFsas: followFsas.
    63         codeGen codeComment: 'BEGIN inlined code of ' , node printString.
       
    64         codeGen indent.
       
    65     ] ifFalse:[ 
       
    66         codeGen startMethod: (codeGen idFor: node).
       
    67         codeGen currentMethod category: 'generated - tokens'.
       
    68         codeGen codeComment: 'GENERATED by ' , node printString.
       
    69         codeGen allocateReturnVariable.
    69     ]
    70     ]
    70      
    71 !
    71 !
    72 
    72 
    73 stopMethodForNode:aPPCNode
    73 unorderedChoiceFromFsas: fsas
    74     ^ codeGen currentMethod isInline ifTrue:[ 
    74     | result startState |
    75                 codeGen dedent.
    75     result := PEGFsa new.
    76                 codeGen add: '"END inlined code of ' , aPPCNode printString , '"'.
    76     startState := PEGFsaState new.
    77                 codeGen stopInline.
    77     
       
    78     result addState: startState.
       
    79     result startState: startState.
       
    80 
       
    81     fsas do: [ :fsa | 
       
    82         result adopt: fsa.
       
    83         result addTransitionFrom: startState to: fsa startState.
       
    84     ].
       
    85 
       
    86     result determinizeStandard.
       
    87     ^ result
       
    88 ! !
       
    89 
       
    90 !PPCTokenCodeGenerator methodsFor:'compiling support'!
       
    91 
       
    92 compileScanner
       
    93     ^ scannerGenerator compileScannerClass
       
    94 !
       
    95 
       
    96 retvalVar
       
    97     ^ compiler currentReturnVariable
       
    98 !
       
    99 
       
   100 startMethodForNode:node
       
   101     node isMarkedForInline ifTrue:[ 
       
   102         compiler startInline: (compiler idFor: node).
       
   103         compiler codeComment: 'BEGIN inlined code of ' , node printString.
       
   104         compiler indent.
       
   105     ] ifFalse:[ 
    78     ] ifFalse:[ 
   106         compiler startMethod: (compiler idFor: node).
    79                 codeGen stopMethod
   107         compiler currentMethod category: 'generated - tokens'.
       
   108         compiler codeComment: 'GENERATED by ' , node printString.
       
   109         compiler allocateReturnVariable.
       
   110     ]
       
   111 !
       
   112 
       
   113 stopMethodForNode:aPPCNode
       
   114     ^ aPPCNode isMarkedForInline ifTrue:[ 
       
   115                 compiler dedent.
       
   116                 compiler add: '"END inlined code of ' , aPPCNode printString , '"'.
       
   117                 compiler stopInline.
       
   118     ] ifFalse:[ 
       
   119                 compiler stopMethod
       
   120     ].
    80     ].
   121 ! !
    81 ! !
   122 
    82 
   123 !PPCTokenCodeGenerator methodsFor:'initialization'!
    83 !PPCTokenCodeGenerator methodsFor:'initialization'!
   124 
    84 
   125 initialize
    85 initialize
   126     super initialize.
    86     super initialize.
   127     
    87     
   128     scannerGenerator := PPCScannerCodeGenerator new.
    88     codeGen := PPCCodeGen new.
   129     scannerGenerator arguments: arguments.
       
   130     
       
   131     "for the given set of nodes, remember the unordered choice fsa
       
   132         see `unorderedChoiceFromFollowSet:`
       
   133     "
       
   134     fsaCache := Dictionary new.
       
   135 ! !
       
   136 
       
   137 !PPCTokenCodeGenerator methodsFor:'scanning'!
       
   138 
       
   139 generateNextScan: node
       
   140     | epsilon followSet  anFsa |
       
   141     followSet := node followSetWithTokens.
       
   142     
       
   143     epsilon := followSet anySatisfy: [ :e | e acceptsEpsilon ].
       
   144     followSet := followSet reject: [ :e | e acceptsEpsilon ].
       
   145     epsilon ifTrue: [ followSet add: PPCEndOfFileNode instance ].
       
   146     
       
   147     anFsa := self unorderedChoiceFromFollowSet: followSet.
       
   148 
       
   149     anFsa name: 'nextToken_', (compiler idFor: node).
       
   150     node nextFsa: anFsa.
       
   151     ^ scannerGenerator generate: anFsa.
       
   152 !
       
   153 
       
   154 generateScan: node
       
   155     | anFsa |
       
   156     anFsa := node asFsa determinize.
       
   157     anFsa name: (compiler idFor: node).
       
   158     anFsa retval: (compiler idFor: node).
       
   159     
       
   160     ^ scannerGenerator generate: anFsa.
       
   161 ! !
    89 ! !
   162 
    90 
   163 !PPCTokenCodeGenerator methodsFor:'visiting'!
    91 !PPCTokenCodeGenerator methodsFor:'visiting'!
   164 
    92 
   165 visitToken: tokenNode
    93 visitToken: tokenNode
   166     |  id  startVar endVar  numberId |
    94     | scanId id |
       
    95     self assert: tokenNode isMarkedForInline not.
       
    96 
   167     self startMethodForNode: tokenNode.
    97     self startMethodForNode: tokenNode.
   168 
    98     
   169     "Tokens cannot be inlined, 
    99     id := codeGen idFor: tokenNode.
   170         - their result is true/false
   100     scanId := codeGen idFor: tokenNode fsa.
   171         - the return value is always stored in currentTokenValue
   101     
   172         - the current token type is always stored in currentTokenType
   102     codeGen code: 'match isNil ifFalse: [ ^ match == ', id storeString, '].'.
   173     "
   103     codeGen profileTokenRead: id.
   174     self assert: tokenNode isMarkedForInline not.
   104     
   175     
       
   176     startVar := compiler allocateTemporaryVariableNamed: 'start'.
       
   177     endVar := compiler allocateTemporaryVariableNamed:  'end'.
       
   178     
       
   179     id := compiler idFor: tokenNode.
       
   180     numberId := compiler numberIdFor: id.
       
   181     
       
   182     compiler add: 'currentTokenType isNil ifFalse: [ ^ currentTokenType == ', id storeString, '].'.
       
   183     
       
   184 "	compiler codeComment: 'number for: ', id storeString, ' is: ', numberId storeString.
       
   185     compiler codeIf: 'scanner match: ', numberId storeString then: [ 
       
   186         compiler codeAssign: '(scanner resultPosition: ', numberId storeString, ').' to: endVar.
       
   187         self createTokenInsance: tokenNode 
       
   188                 id: id storeString 
       
   189                 start: '(context position + 1)' 
       
   190                 end: endVar.
       
   191         
       
   192         compiler code: 'context position: ', endVar, '.'.
       
   193         
       
   194         self consumeWhitespace: tokenNode.
       
   195         compiler codeReturn: 'true'.
       
   196     ].
       
   197     compiler codeIf: 'scanner backtracked not' then: [ 
       
   198         compiler codeReturn: 'false'.
       
   199     ].
       
   200     compiler codeComment: 'No match, no fail, scanner does not know about this...'.	
       
   201 "
       
   202     compiler profileTokenRead: id.
       
   203     
       
   204 "	self scan: tokenNode start: startVar end: endVar."
       
   205     "	compiler add: 'self assert: scanner isSingleMatch.'."
       
   206 "	compiler codeIf: 'scanner match ' then: ["
       
   207 
       
   208     tokenNode child hasName ifFalse: [ 
   105     tokenNode child hasName ifFalse: [ 
   209         tokenNode child name: tokenNode name
   106         tokenNode child name: tokenNode name
   210     ].
   107     ].
   211 
   108 
   212     compiler codeAssign: 'context position + 1.' to: startVar.
   109     codeGen codeIf: 'self ', scanId then: [ 
   213     compiler codeIf: [ compiler code: ((self generateScan: tokenNode child) callOn: 'scanner') ] then: [ 
       
   214         compiler add: 'context position: scanner resultPosition.'.
       
   215         compiler codeAssign: 'context position.' to: endVar.	
       
   216         self consumeWhitespace: tokenNode.
   110         self consumeWhitespace: tokenNode.
   217         self createTokenInsance: tokenNode id: id storeString start: startVar end: endVar.
   111         codeGen codeReturn: 'true'.
   218         compiler codeReturn: 'true'.
       
   219     ] else: [ 
   112     ] else: [ 
   220         compiler code: 'scanner backtrackDistinct.'.
   113         codeGen codeReturn: 'false'.
   221         compiler code: 'context position: ', startVar, ' - 1.'. 
       
   222         compiler codeReturn: 'false'.
       
   223     ].
   114     ].
   224     
   115     
   225     ^ self stopMethodForNode: tokenNode
   116     ^ self stopMethodForNode: tokenNode
   226 !
   117 !
   227 
   118 
   228 visitTokenConsumeNode: node
   119 visitTokenConsumeNode: node
   229     | id   nextScan |
   120     | id nextId |
   230     self startMethodForNode: node.
   121     self startMethodForNode: node.
   231     id := (compiler idFor: node child).
   122     id := (codeGen idFor: node child).
   232 
   123     nextId := (codeGen idFor: node nextFsa).
   233     compiler add: 'self ', id asString, ' ifTrue: ['.
   124     
   234         compiler indent.
   125     "this will inline scanner consumeXY in the parser"
   235 
   126     node markForInline.
   236         nextScan := self generateNextScan: node.
   127     
   237         
   128 
       
   129     codeGen codeIf: 'self ', id asString then: [
       
   130         codeGen codeAssign: [ 
       
   131             self createTokenInstance: node child
       
   132                 id: id asString
       
   133                 start: 'position + 1'
       
   134                 end: 'matchPosition'.
       
   135         ] to: self retvalVar.
       
   136 
       
   137 
       
   138         codeGen codeAssign: 'context position' to: 'position'; codeDot.
       
   139         codeGen codeAssign: 'position' to: 'matchPosition'; codeDot.
       
   140         codeGen codeAssign: 'nil' to: 'match'; codeDot.		
       
   141     
   238         node nextFsa hasDistinctRetvals ifTrue: [ 
   142         node nextFsa hasDistinctRetvals ifTrue: [ 
   239             compiler codeAssign: 'currentTokenValue.' to: self retvalVar.
   143             codeGen codeIf: [ codeGen codeOnLine: ('self ', nextId) ] then: [ 
   240         
       
   241             compiler add: (nextScan callOn: 'scanner'), '.'.
       
   242             compiler codeIf: 'scanner match' then: [ 
       
   243             compiler add: 'context position: scanner resultPosition.'.
       
   244                 self createTokenInsance: node child 
       
   245                         id: 'scanner result' 
       
   246                         start: 'scanner position + 1' 
       
   247                         end: 'scanner resultPosition'.
       
   248                 self consumeWhitespace: node child.
   144                 self consumeWhitespace: node child.
   249                 compiler codeReturn.
       
   250             ] else: [ 
   145             ] else: [ 
   251                 compiler codeComment: 'Looks like there is an error on its way...'.
   146                 self flag: 'imo should do something here and not wait...'.
   252                 compiler code: 'context position: scanner position.'.
   147                 codeGen codeComment: 'Looks like there is an error on its way...'.
   253                 compiler codeAssign: 'nil.' to: 'currentTokenType'.
       
   254                 compiler codeReturn.
       
   255             ]
   148             ]
   256 
       
   257         ] ifFalse: [ 
       
   258             compiler codeAssign: 'nil.' to: 'currentTokenType'.
       
   259             compiler codeReturn: 'currentTokenValue'.
       
   260         ].
   149         ].
   261         compiler dedent.
   150         codeGen codeReturn.
   262 
   151     
   263     "Token not found"
   152     "Token not found"
   264     compiler add: '] ifFalse: ['.
   153     ] else: [ 
   265         compiler indent.
   154 "		codeGen code: 'PPCScannerError new signalWith: ''', id asString, ' expected'''."
   266         compiler codeError: id asString, ' expected'.
   155         codeGen codeReturn: 'nil.'.
   267         compiler dedent.
   156     ].
   268     compiler add: '].'.
       
   269 
   157 
   270     ^ self stopMethodForNode: node
   158     ^ self stopMethodForNode: node
   271 !
   159 !
   272 
   160 
   273 visitTokenNode: node
   161 visitTokenNode: node
   274     ^ self visitToken: node
   162     ^ self visitToken: node
   275 !
   163 !
   276 
   164 
       
   165 visitTokenizingParserNode: node
       
   166     "produces token_XY methods"
       
   167     self visit: node tokens.
       
   168 
       
   169     "TODO JK: hack alert, I don't like WS handling, think of something smarter,
       
   170         perhaps allow for WS unique per token...
       
   171     "
       
   172     self visitWhitespace: node whitespace.
       
   173     
       
   174     "produces tokenConsume_XY methods"
       
   175     ^ self visit: node parser
       
   176 !
       
   177 
   277 visitTrimmingTokenCharacterNode: node
   178 visitTrimmingTokenCharacterNode: node
   278     |  id     |
   179     |  id  |
       
   180     self halt.
       
   181     self assert: node isMarkedForInline not.
       
   182 
   279     self startMethodForNode:node.
   183     self startMethodForNode:node.
   280 
   184     
   281     "Tokens cannot be inlined, 
   185     id := codeGen idFor: node.
   282         - their result is true/false
   186     
   283         - the return value is always stored in currentTokenValue
   187     codeGen add: 'match isNil ifFalse: [ ^ match == ', id storeString, '].'.
   284         - the current token type is always stored in currentTokenType
   188     codeGen profileTokenRead: id.
   285     "
   189 
   286     self assert: node isMarkedForInline not.
   190     codeGen add: '(context uncheckedPeek == ', node child character storeString, ') ifFalse: [ ^ false ].'.
   287     
   191     codeGen add: 'context next.'.
   288     id := compiler idFor: node.
   192 
   289     
   193     self createTokenInstance: node id: id storeString  start: 'context position' end: 'context position'.
   290     compiler add: 'currentTokenType isNil ifFalse: [ ^ currentTokenType == ', id storeString, '].'.
       
   291     compiler profileTokenRead: id.
       
   292 
       
   293     compiler add: '(context peek == ', node child character storeString, ') ifFalse: [ ^ false ].'.
       
   294     compiler add: 'context next.'.
       
   295 
       
   296     self createTokenInsance: node id: id storeString  start: 'context position' end: 'context position'.
       
   297     self consumeWhitespace: node.
   194     self consumeWhitespace: node.
   298     
   195     
   299     compiler codeReturn: 'true'.
   196     codeGen codeReturn: 'true'.
   300 
   197 
   301     ^ self stopMethodForNode: node
   198     ^ self stopMethodForNode: node
   302 !
   199 !
   303 
   200 
   304 visitTrimmingTokenNode: node
   201 visitTrimmingTokenNode: node
   305     ^ self visitToken: node
   202     ^ self visitToken: node
   306 ! !
   203 !
   307 
   204 
       
   205 visitWhitespace: whitespaceNode
       
   206     self assert: whitespaceNode name = 'consumeWhitespace'.
       
   207 ! !
       
   208