compiler/PPCTokenCodeGenerator.st
changeset 525 751532c8f3db
parent 518 a6d8b93441b0
parent 524 f6f68d32de73
child 529 439c4057517f
equal deleted inserted replaced
523:09afcf28ed60 525:751532c8f3db
     1 "{ Package: 'stx:goodies/petitparser/compiler' }"
     1 "{ Package: 'stx:goodies/petitparser/compiler' }"
     2 
     2 
     3 "{ NameSpace: Smalltalk }"
     3 "{ NameSpace: Smalltalk }"
     4 
     4 
     5 PPCNodeVisitor subclass:#PPCTokenCodeGenerator
     5 PPCNodeVisitor subclass:#PPCTokenCodeGenerator
     6 	instanceVariableNames:'compiler scannerGenerator fsaCache'
     6 	instanceVariableNames:'codeGen'
     7 	classVariableNames:''
     7 	classVariableNames:''
     8 	poolDictionaries:''
     8 	poolDictionaries:''
     9 	category:'PetitCompiler-Visitors'
     9 	category:'PetitCompiler-Visitors-CodeGenerators'
    10 !
    10 !
    11 
       
    12 
    11 
    13 !PPCTokenCodeGenerator methodsFor:'accessing'!
    12 !PPCTokenCodeGenerator methodsFor:'accessing'!
    14 
    13 
    15 arguments: args
    14 arguments: args
    16     super arguments: args.
    15     super arguments: args.
    17     scannerGenerator arguments: args
    16     codeGen arguments: args.
    18 !
    17 !
    19 
    18 
    20 compiler
    19 clazz: aPPCClass
    21     ^ compiler
    20     codeGen clazz: aPPCClass
    22 !
       
    23 
       
    24 compiler: anObject
       
    25     compiler := anObject.
       
    26     
       
    27     scannerGenerator compiler idGen: compiler idGen.
       
    28 ! !
    21 ! !
    29 
    22 
    30 !PPCTokenCodeGenerator methodsFor:'code support'!
    23 !PPCTokenCodeGenerator methodsFor:'code support'!
    31 
    24 
    32 consumeWhitespace: node
    25 consumeWhitespace: node
    33     self assert: node isTokenNode.
    26     self assert: node isTokenNode.
    34 
    27 
    35     node isTrimmingTokenNode ifTrue: [ 
    28     node isTrimmingTokenNode ifTrue: [ 
    36         compiler code: 'self consumeWhitespace.'
    29         codeGen code: 'self scan_consumeWhitespace.'
    37     ]
    30     ]
    38 !
    31 !
    39 
    32 
    40 createTokenInsance: node id: idCode start: startVar end: endVar
    33 createTokenInstance: node id: idCode start: startVar end: endVar
    41     compiler codeTranscriptShow: 'current token type: ', idCode.
    34     codeGen startInline.
    42     compiler codeAssign: idCode, '.' to: 'currentTokenType'.
    35     codeGen codeTranscriptShow: 'current token type: ', idCode.
    43     compiler codeAssign: node tokenClass asString, ' on: (context collection) 
    36     codeGen codeAssign: node tokenClass asString, ' on: (context collection) 
    44                                                             start: ', startVar, ' 
    37                                                             start: ', startVar, ' 
    45                                                             stop: ', endVar, '
    38                                                             stop: ', endVar, '
    46                                                             value: nil.'
    39                                                             value: nil.'
    47                to: 'currentTokenValue'.
    40                to: self retvalVar.
       
    41     ^ codeGen stopInline
    48 !
    42 !
    49 
    43 
    50 scan: node start: startVar end: endVar
    44 scan: node start: startVar end: endVar
    51     node child hasName ifFalse: [ 
    45     node child hasName ifFalse: [ 
    52         node child name: node name
    46         node child name: node name
    53     ].
    47     ].
    54 
    48 
    55     compiler codeAssign: 'context position + 1.' to: startVar.
    49     codeGen codeAssign: 'context position + 1.' to: startVar.
    56     compiler add: ((self generateScan: node child) callOn: 'scanner').
    50     codeGen add: ((self generateScan: node child) callOn: 'scanner').
    57 !
    51 ! !
    58 
    52 
    59 unorderedChoiceFromFollowSet: followSet
    53 !PPCTokenCodeGenerator methodsFor:'compiling support'!
    60     | followFsas  |
    54 
    61     
    55 retvalVar
    62     ^ fsaCache at: followSet ifAbsentPut: [ 
    56     ^ codeGen currentReturnVariable
    63         followFsas := followSet collect: [ :followNode | 
    57 !
    64                 (followNode asFsa) 
    58 
    65                     name: (compiler idFor: followNode);
    59 startMethodForNode:node
    66                     retval: (compiler idFor: followNode); 
    60 
    67                     yourself
    61     node isMarkedForInline ifTrue:[ 
    68         ].
    62         codeGen startInline: (codeGen idFor: node).
    69         self unorderedChoiceFromFsas: followFsas.
    63         codeGen codeComment: 'BEGIN inlined code of ' , node printString.
       
    64         codeGen indent.
       
    65     ] ifFalse:[ 
       
    66         codeGen startMethod: (codeGen idFor: node).
       
    67         codeGen currentMethod category: 'generated - tokens'.
       
    68         codeGen codeComment: 'GENERATED by ' , node printString.
       
    69         codeGen allocateReturnVariable.
    70     ]
    70     ]
    71      
    71 !
    72 !
    72 
    73 
    73 stopMethodForNode:aPPCNode
    74 unorderedChoiceFromFsas: fsas
    74     ^ codeGen currentMethod isInline ifTrue:[ 
    75     | result startState |
    75                 codeGen dedent.
    76     result := PEGFsa new.
    76                 codeGen add: '"END inlined code of ' , aPPCNode printString , '"'.
    77     startState := PEGFsaState new.
    77                 codeGen stopInline.
    78     
       
    79     result addState: startState.
       
    80     result startState: startState.
       
    81 
       
    82     fsas do: [ :fsa | 
       
    83         result adopt: fsa.
       
    84         result addTransitionFrom: startState to: fsa startState.
       
    85     ].
       
    86 
       
    87     result determinizeStandard.
       
    88     ^ result
       
    89 ! !
       
    90 
       
    91 !PPCTokenCodeGenerator methodsFor:'compiling support'!
       
    92 
       
    93 compileScanner
       
    94     ^ scannerGenerator compileScannerClass
       
    95 !
       
    96 
       
    97 retvalVar
       
    98     ^ compiler currentReturnVariable
       
    99 !
       
   100 
       
   101 startMethodForNode:node
       
   102     node isMarkedForInline ifTrue:[ 
       
   103         compiler startInline: (compiler idFor: node).
       
   104         compiler codeComment: 'BEGIN inlined code of ' , node printString.
       
   105         compiler indent.
       
   106     ] ifFalse:[ 
    78     ] ifFalse:[ 
   107         compiler startMethod: (compiler idFor: node).
    79                 codeGen stopMethod
   108         compiler currentMethod category: 'generated - tokens'.
       
   109         compiler codeComment: 'GENERATED by ' , node printString.
       
   110         compiler allocateReturnVariable.
       
   111     ]
       
   112 !
       
   113 
       
   114 stopMethodForNode:aPPCNode
       
   115     ^ aPPCNode isMarkedForInline ifTrue:[ 
       
   116                 compiler dedent.
       
   117                 compiler add: '"END inlined code of ' , aPPCNode printString , '"'.
       
   118                 compiler stopInline.
       
   119     ] ifFalse:[ 
       
   120                 compiler stopMethod
       
   121     ].
    80     ].
   122 ! !
    81 ! !
   123 
    82 
   124 !PPCTokenCodeGenerator methodsFor:'initialization'!
    83 !PPCTokenCodeGenerator methodsFor:'initialization'!
   125 
    84 
   126 initialize
    85 initialize
   127     super initialize.
    86     super initialize.
   128     
    87     
   129     scannerGenerator := PPCScannerCodeGenerator new.
    88     codeGen := PPCCodeGen new.
   130     scannerGenerator arguments: arguments.
       
   131     
       
   132     "for the given set of nodes, remember the unordered choice fsa
       
   133         see `unorderedChoiceFromFollowSet:`
       
   134     "
       
   135     fsaCache := Dictionary new.
       
   136 ! !
       
   137 
       
   138 !PPCTokenCodeGenerator methodsFor:'scanning'!
       
   139 
       
   140 generateNextScan: node
       
   141     | epsilon followSet  anFsa |
       
   142     followSet := node followSetWithTokens.
       
   143     
       
   144     epsilon := followSet anySatisfy: [ :e | e acceptsEpsilon ].
       
   145     followSet := followSet reject: [ :e | e acceptsEpsilon ].
       
   146     epsilon ifTrue: [ followSet add: PPCEndOfFileNode instance ].
       
   147     
       
   148     anFsa := self unorderedChoiceFromFollowSet: followSet.
       
   149 
       
   150     anFsa name: 'nextToken_', (compiler idFor: node).
       
   151     node nextFsa: anFsa.
       
   152     ^ scannerGenerator generate: anFsa.
       
   153 !
       
   154 
       
   155 generateScan: node
       
   156     | anFsa |
       
   157     anFsa := node asFsa determinize.
       
   158     anFsa name: (compiler idFor: node).
       
   159     anFsa retval: (compiler idFor: node).
       
   160     
       
   161     ^ scannerGenerator generate: anFsa.
       
   162 ! !
    89 ! !
   163 
    90 
   164 !PPCTokenCodeGenerator methodsFor:'visiting'!
    91 !PPCTokenCodeGenerator methodsFor:'visiting'!
   165 
    92 
   166 visitToken: tokenNode
    93 visitToken: tokenNode
   167     |  id  startVar endVar  numberId |
    94     | scanId id |
       
    95     self assert: tokenNode isMarkedForInline not.
       
    96 
   168     self startMethodForNode: tokenNode.
    97     self startMethodForNode: tokenNode.
   169 
    98     
   170     "Tokens cannot be inlined, 
    99     id := codeGen idFor: tokenNode.
   171         - their result is true/false
   100     scanId := codeGen idFor: tokenNode fsa.
   172         - the return value is always stored in currentTokenValue
   101     
   173         - the current token type is always stored in currentTokenType
   102     codeGen code: 'match isNil ifFalse: [ ^ match == ', id storeString, '].'.
   174     "
   103     codeGen profileTokenRead: id.
   175     self assert: tokenNode isMarkedForInline not.
   104     
   176     
       
   177     startVar := compiler allocateTemporaryVariableNamed: 'start'.
       
   178     endVar := compiler allocateTemporaryVariableNamed:  'end'.
       
   179     
       
   180     id := compiler idFor: tokenNode.
       
   181     numberId := compiler numberIdFor: id.
       
   182     
       
   183     compiler add: 'currentTokenType isNil ifFalse: [ ^ currentTokenType == ', id storeString, '].'.
       
   184     
       
   185 "	compiler codeComment: 'number for: ', id storeString, ' is: ', numberId storeString.
       
   186     compiler codeIf: 'scanner match: ', numberId storeString then: [ 
       
   187         compiler codeAssign: '(scanner resultPosition: ', numberId storeString, ').' to: endVar.
       
   188         self createTokenInsance: tokenNode 
       
   189                 id: id storeString 
       
   190                 start: '(context position + 1)' 
       
   191                 end: endVar.
       
   192         
       
   193         compiler code: 'context position: ', endVar, '.'.
       
   194         
       
   195         self consumeWhitespace: tokenNode.
       
   196         compiler codeReturn: 'true'.
       
   197     ].
       
   198     compiler codeIf: 'scanner backtracked not' then: [ 
       
   199         compiler codeReturn: 'false'.
       
   200     ].
       
   201     compiler codeComment: 'No match, no fail, scanner does not know about this...'.	
       
   202 "
       
   203     compiler profileTokenRead: id.
       
   204     
       
   205 "	self scan: tokenNode start: startVar end: endVar."
       
   206     "	compiler add: 'self assert: scanner isSingleMatch.'."
       
   207 "	compiler codeIf: 'scanner match ' then: ["
       
   208 
       
   209     tokenNode child hasName ifFalse: [ 
   105     tokenNode child hasName ifFalse: [ 
   210         tokenNode child name: tokenNode name
   106         tokenNode child name: tokenNode name
   211     ].
   107     ].
   212 
   108 
   213     compiler codeAssign: 'context position + 1.' to: startVar.
   109     codeGen codeIf: 'self ', scanId then: [ 
   214     compiler codeIf: [ compiler code: ((self generateScan: tokenNode child) callOn: 'scanner') ] then: [ 
       
   215         compiler add: 'context position: scanner resultPosition.'.
       
   216         compiler codeAssign: 'context position.' to: endVar.	
       
   217         self consumeWhitespace: tokenNode.
   110         self consumeWhitespace: tokenNode.
   218         self createTokenInsance: tokenNode id: id storeString start: startVar end: endVar.
   111         codeGen codeReturn: 'true'.
   219         compiler codeReturn: 'true'.
       
   220     ] else: [ 
   112     ] else: [ 
   221         compiler code: 'scanner backtrackDistinct.'.
   113         codeGen codeReturn: 'false'.
   222         compiler code: 'context position: ', startVar, ' - 1.'. 
       
   223         compiler codeReturn: 'false'.
       
   224     ].
   114     ].
   225     
   115     
   226     ^ self stopMethodForNode: tokenNode
   116     ^ self stopMethodForNode: tokenNode
   227 !
   117 !
   228 
   118 
   229 visitTokenConsumeNode: node
   119 visitTokenConsumeNode: node
   230     | id   nextScan |
   120     | id nextId |
   231     self startMethodForNode: node.
   121     self startMethodForNode: node.
   232     id := (compiler idFor: node child).
   122     id := (codeGen idFor: node child).
   233 
   123     nextId := (codeGen idFor: node nextFsa).
   234     compiler add: 'self ', id asString, ' ifTrue: ['.
   124     
   235         compiler indent.
   125     "this will inline scanner consumeXY in the parser"
   236 
   126     node markForInline.
   237         nextScan := self generateNextScan: node.
   127     
   238         
   128 
       
   129     codeGen codeIf: 'self ', id asString then: [
       
   130         codeGen codeAssign: [ 
       
   131             self createTokenInstance: node child
       
   132                 id: id asString
       
   133                 start: 'position + 1'
       
   134                 end: 'matchPosition'.
       
   135         ] to: self retvalVar.
       
   136 
       
   137 
       
   138         codeGen codeAssign: 'context position' to: 'position'; codeDot.
       
   139         codeGen codeAssign: 'position' to: 'matchPosition'; codeDot.
       
   140         codeGen codeAssign: 'nil' to: 'match'; codeDot.		
       
   141     
   239         node nextFsa hasDistinctRetvals ifTrue: [ 
   142         node nextFsa hasDistinctRetvals ifTrue: [ 
   240             compiler codeAssign: 'currentTokenValue.' to: self retvalVar.
   143             codeGen codeIf: [ codeGen codeOnLine: ('self ', nextId) ] then: [ 
   241         
       
   242             compiler add: (nextScan callOn: 'scanner'), '.'.
       
   243             compiler codeIf: 'scanner match' then: [ 
       
   244             compiler add: 'context position: scanner resultPosition.'.
       
   245                 self createTokenInsance: node child 
       
   246                         id: 'scanner result' 
       
   247                         start: 'scanner position + 1' 
       
   248                         end: 'scanner resultPosition'.
       
   249                 self consumeWhitespace: node child.
   144                 self consumeWhitespace: node child.
   250                 compiler codeReturn.
       
   251             ] else: [ 
   145             ] else: [ 
   252                 compiler codeComment: 'Looks like there is an error on its way...'.
   146                 self flag: 'imo should do something here and not wait...'.
   253                 compiler code: 'context position: scanner position.'.
   147                 codeGen codeComment: 'Looks like there is an error on its way...'.
   254                 compiler codeAssign: 'nil.' to: 'currentTokenType'.
       
   255                 compiler codeReturn.
       
   256             ]
   148             ]
   257 
       
   258         ] ifFalse: [ 
       
   259             compiler codeAssign: 'nil.' to: 'currentTokenType'.
       
   260             compiler codeReturn: 'currentTokenValue'.
       
   261         ].
   149         ].
   262         compiler dedent.
   150         codeGen codeReturn.
   263 
   151     
   264     "Token not found"
   152     "Token not found"
   265     compiler add: '] ifFalse: ['.
   153     ] else: [ 
   266         compiler indent.
   154 "		codeGen code: 'PPCScannerError new signalWith: ''', id asString, ' expected'''."
   267         compiler codeError: id asString, ' expected'.
   155         codeGen codeReturn: 'nil.'.
   268         compiler dedent.
   156     ].
   269     compiler add: '].'.
       
   270 
   157 
   271     ^ self stopMethodForNode: node
   158     ^ self stopMethodForNode: node
   272 !
   159 !
   273 
   160 
   274 visitTokenNode: node
   161 visitTokenNode: node
   275     ^ self visitToken: node
   162     ^ self visitToken: node
   276 !
   163 !
   277 
   164 
       
   165 visitTokenizingParserNode: node
       
   166     "produces token_XY methods"
       
   167     self visit: node tokens.
       
   168 
       
   169     "TODO JK: hack alert, I don't like WS handling, think of something smarter,
       
   170         perhaps allow for WS unique per token...
       
   171     "
       
   172     self visitWhitespace: node whitespace.
       
   173     
       
   174     "produces tokenConsume_XY methods"
       
   175     ^ self visit: node parser
       
   176 !
       
   177 
   278 visitTrimmingTokenCharacterNode: node
   178 visitTrimmingTokenCharacterNode: node
   279     |  id     |
   179     |  id  |
       
   180     self halt.
       
   181     self assert: node isMarkedForInline not.
       
   182 
   280     self startMethodForNode:node.
   183     self startMethodForNode:node.
   281 
   184     
   282     "Tokens cannot be inlined, 
   185     id := codeGen idFor: node.
   283         - their result is true/false
   186     
   284         - the return value is always stored in currentTokenValue
   187     codeGen add: 'match isNil ifFalse: [ ^ match == ', id storeString, '].'.
   285         - the current token type is always stored in currentTokenType
   188     codeGen profileTokenRead: id.
   286     "
   189 
   287     self assert: node isMarkedForInline not.
   190     codeGen add: '(context uncheckedPeek == ', node child character storeString, ') ifFalse: [ ^ false ].'.
   288     
   191     codeGen add: 'context next.'.
   289     id := compiler idFor: node.
   192 
   290     
   193     self createTokenInstance: node id: id storeString  start: 'context position' end: 'context position'.
   291     compiler add: 'currentTokenType isNil ifFalse: [ ^ currentTokenType == ', id storeString, '].'.
       
   292     compiler profileTokenRead: id.
       
   293 
       
   294     compiler add: '(context peek == ', node child character storeString, ') ifFalse: [ ^ false ].'.
       
   295     compiler add: 'context next.'.
       
   296 
       
   297     self createTokenInsance: node id: id storeString  start: 'context position' end: 'context position'.
       
   298     self consumeWhitespace: node.
   194     self consumeWhitespace: node.
   299     
   195     
   300     compiler codeReturn: 'true'.
   196     codeGen codeReturn: 'true'.
   301 
   197 
   302     ^ self stopMethodForNode: node
   198     ^ self stopMethodForNode: node
   303 !
   199 !
   304 
   200 
   305 visitTrimmingTokenNode: node
   201 visitTrimmingTokenNode: node
   306     ^ self visitToken: node
   202     ^ self visitToken: node
   307 ! !
   203 !
   308 
   204 
   309 !PPCTokenCodeGenerator class methodsFor:'documentation'!
   205 visitWhitespace: whitespaceNode
   310 
   206     self assert: whitespaceNode name = 'consumeWhitespace'.
   311 version_HG
   207 ! !
   312 
   208 
   313     ^ '$Changeset: <not expanded> $'
       
   314 ! !
       
   315