TokenizedStream.st
changeset 173 f8c6732b927c
parent 172 99b850002359
child 174 64e81af0a50b
equal deleted inserted replaced
172:99b850002359 173:f8c6732b927c
     1 ReadStream subclass:#TokenizedStream
     1 ReadStream subclass:#TokenizedStream
     2 	instanceVariableNames:'source token tokenType tokenPosition tokenName tokenLineNr
     2 	instanceVariableNames:'source token tokenType tokenPosition tokenName tokenLineNr
     3 		tokenValue tokenRadix hereChar peekChar peekChar2
     3 		tokenValue tokenRadix hereChar peekChar peekChar2
     4 		beginCommentCharacter endCommentCharacter eolCommentCharacter
     4 		beginCommentCharacter endCommentCharacter eolCommentCharacter
     5 		eolCharacter outStream outCol actions types eolIsSignificant'
     5 		eolCharacter outStream outCol actions types eolIsSignificant
       
     6 		allowFloatNumbers'
     6 	classVariableNames:'DefaultActions DefaultTypes'
     7 	classVariableNames:'DefaultActions DefaultTypes'
     7 	poolDictionaries:''
     8 	poolDictionaries:''
     8 	category:'Streams'
     9 	category:'Streams'
     9 !
    10 !
    10 
    11 
   121                     ]
   122                     ]
   122                 ]
   123                 ]
   123             ]
   124             ]
   124         ].
   125         ].
   125 
   126 
   126 
   127     allowing float numbers (the default):
   127     scan /etc/services file:
   128 
       
   129         |s|
       
   130 
       
   131         s := TokenizedStream on:'1.23 4.56 7 8 9'.
       
   132         [s atEnd] whileFalse:[
       
   133             s next.
       
   134             Transcript showCr:(s tokenType displayString, ' value=' , s tokenValue printString).
       
   135         ].
       
   136 
       
   137 
       
   138     not allowing float numbers :
       
   139 
       
   140         |s|
       
   141 
       
   142         s := TokenizedStream on:'1.23 4.56 7 8 9'.
       
   143         s allowFloatNumbers:false.
       
   144 
       
   145         [s atEnd] whileFalse:[
       
   146             s next.
       
   147             Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString).
       
   148         ].
       
   149 
       
   150 
       
   151     no radix numbers (the default):
       
   152 
       
   153         |s|
       
   154 
       
   155         s := TokenizedStream on:'0x1234 16r1234'.
       
   156 
       
   157         [s atEnd] whileFalse:[
       
   158             s next.
       
   159             Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString , ' name=' , s tokenName displayString).
       
   160         ].
       
   161 
       
   162 
       
   163     C-style radix numbers:
       
   164 
       
   165         |s|
       
   166 
       
   167         s := TokenizedStream on:'0x1234 16r1234'.
       
   168         s actionTable at:#digit put:[:s :char | s nextCNumber].
       
   169 
       
   170         [s atEnd] whileFalse:[
       
   171             s next.
       
   172             Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString , ' name=' , s tokenName displayString).
       
   173         ].
       
   174 
       
   175 
       
   176     smalltalk-style radix numbers:
       
   177 
       
   178         |s|
       
   179 
       
   180         s := TokenizedStream on:'0x1234 16r1234'.
       
   181         s actionTable at:#digit put:[:s :char | s nextSmalltalkNumber].
       
   182 
       
   183         [s atEnd] whileFalse:[
       
   184             s next.
       
   185             Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString , ' name=' , s tokenName displayString).
       
   186         ].
       
   187 
       
   188 
       
   189     scan the '/etc/services' file:
   128 
   190 
   129         |s t service port protocol|
   191         |s t service port protocol|
   130 
   192 
   131         s := TokenizedStream on:'/etc/services' asFilename readStream.
   193         s := TokenizedStream on:'/etc/services' asFilename readStream.
   132         s eolCommentCharacter:$#.
   194         s eolCommentCharacter:$#.
   154 ! !
   216 ! !
   155 
   217 
   156 !TokenizedStream class methodsFor:'initialization'!
   218 !TokenizedStream class methodsFor:'initialization'!
   157 
   219 
   158 initialize
   220 initialize
   159     |block|
   221     DefaultActions := IdentityDictionary new.
   160 
       
   161     DefaultActions := Array new:256.
       
   162     DefaultTypes := Array new:256.
   222     DefaultTypes := Array new:256.
   163 
   223 
   164     "kludge: action is nextColonOrAssign, but type is special"
   224     "kludge: action is nextColonOrAssign, but type is special"
   165     2 to:255 do:[:code |
   225     2 to:255 do:[:code |
   166         DefaultTypes at:code put:(Character value:code).
   226         DefaultTypes at:code put:(Character value:code).
   167     ].
   227     ].
   168 
   228 
   169     block := [:s :char | s nextInteger].
       
   170     ($0 asciiValue) to:($9 asciiValue) do:[:index |
   229     ($0 asciiValue) to:($9 asciiValue) do:[:index |
   171         DefaultTypes at:index put:#digit.
   230         DefaultTypes at:index put:#digit.
   172         DefaultActions at:index put:block
   231     ].
   173     ].
   232 
   174 
       
   175     block := [:s :char | s nextIdentifier].
       
   176     ($a asciiValue) to:($z asciiValue) do:[:index |
   233     ($a asciiValue) to:($z asciiValue) do:[:index |
   177         DefaultTypes at:index put:#letter.
   234         DefaultTypes at:index put:#letter.
   178         DefaultActions at:index put:block
       
   179     ].
   235     ].
   180     ($A asciiValue) to:($Z asciiValue) do:[:index |
   236     ($A asciiValue) to:($Z asciiValue) do:[:index |
   181         DefaultTypes at:index put:#letter.
   237         DefaultTypes at:index put:#letter.
   182         DefaultActions at:index put:block
   238     ].
   183     ].
   239 
       
   240     DefaultActions at:#letter put:[:s :char | s nextIdentifier].
       
   241     DefaultActions at:#digit  put:[:s :char | s nextNumber].
   184 
   242 
   185     "
   243     "
   186      TokenizedStream initialize
   244      TokenizedStream initialize
   187     "
   245     "
   188 ! !
   246 ! !
   197 
   255 
   198 actionTable
   256 actionTable
   199     ^ actions
   257     ^ actions
   200 
   258 
   201     "Created: 1.2.1996 / 17:42:00 / cg"
   259     "Created: 1.2.1996 / 17:42:00 / cg"
       
   260 !
       
   261 
       
   262 allowFloatNumbers:aBoolean
       
   263     "if false, floating numbers are not read; a period is returned as
       
   264      a separate token. If true (the default), floating point numbers are allowed."
       
   265 
       
   266     allowFloatNumbers := aBoolean
       
   267 
       
   268     "Modified: 1.2.1996 / 18:14:27 / cg"
       
   269     "Created: 1.2.1996 / 18:27:41 / cg"
   202 !
   270 !
   203 
   271 
   204 beginCommentCharacter:aCharacter
   272 beginCommentCharacter:aCharacter
   205     beginCommentCharacter := aCharacter
   273     beginCommentCharacter := aCharacter
   206 
   274 
   259     tokenLineNr := 1.
   327     tokenLineNr := 1.
   260     eolCommentCharacter := beginCommentCharacter := endCommentCharacter := nil.
   328     eolCommentCharacter := beginCommentCharacter := endCommentCharacter := nil.
   261     eolCharacter := Character cr.
   329     eolCharacter := Character cr.
   262     eolIsSignificant := false.
   330     eolIsSignificant := false.
   263 
   331 
   264     actions := DefaultActions.
   332     actions := DefaultActions shallowCopy.
   265     types := DefaultTypes.
   333     types := DefaultTypes shallowCopy.
   266 
   334     allowFloatNumbers := true.
   267     "Modified: 1.2.1996 / 17:36:56 / cg"
   335 
       
   336     "Modified: 1.2.1996 / 18:42:54 / cg"
   268 ! !
   337 ! !
   269 
   338 
   270 !TokenizedStream methodsFor:'private'!
   339 !TokenizedStream methodsFor:'private'!
   271 
   340 
   272 on:aStringOrStream
   341 on:aStringOrStream
   286 
   355 
   287 next
   356 next
   288     ^ self nextToken
   357     ^ self nextToken
   289 
   358 
   290     "Created: 1.2.1996 / 17:21:47 / cg"
   359     "Created: 1.2.1996 / 17:21:47 / cg"
       
   360 !
       
   361 
       
   362 nextCNumber
       
   363     |nextChar value s|
       
   364 
       
   365     tokenRadix := 10.
       
   366     source peek == $0 ifTrue:[
       
   367         source next.
       
   368         source peek == $x ifTrue:[
       
   369             source next.
       
   370             tokenRadix := 16.
       
   371         ] ifFalse:[
       
   372             tokenRadix := 8
       
   373         ]
       
   374     ].
       
   375 
       
   376     value := Integer readFrom:source radix:tokenRadix.
       
   377     nextChar := source peek.
       
   378 
       
   379     (allowFloatNumbers and:[tokenRadix == 10]) ifTrue:[
       
   380         (nextChar == $.) ifTrue:[
       
   381             nextChar := source nextPeek.
       
   382             (nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
       
   383                 value := value asFloat + (self nextMantissa:tokenRadix).
       
   384                 nextChar := source peek
       
   385             ] ifFalse:[
       
   386                 nextChar == (Character cr) ifTrue:[
       
   387                     tokenLineNr := tokenLineNr + 1.
       
   388                 ].
       
   389                 peekChar := $.
       
   390             ]
       
   391         ].
       
   392         ((nextChar == $e) or:[nextChar == $E]) ifTrue:[
       
   393             nextChar := source nextPeek.
       
   394             (nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
       
   395                 s := 1.
       
   396                 (nextChar == $+) ifTrue:[
       
   397                     nextChar := source nextPeek
       
   398                 ] ifFalse:[
       
   399                     (nextChar == $-) ifTrue:[
       
   400                         nextChar := source nextPeek.
       
   401                         s := s negated
       
   402                     ]
       
   403                 ].
       
   404                 value := value asFloat
       
   405                          * (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
       
   406             ]
       
   407         ].
       
   408     ].
       
   409     tokenValue := value.
       
   410     (value isMemberOf:Float) ifTrue:[
       
   411         tokenType := #Float
       
   412     ] ifFalse:[
       
   413         tokenType := #Integer
       
   414     ].
       
   415     ^ tokenType
       
   416 
       
   417     "Created: 1.2.1996 / 18:26:27 / cg"
   291 !
   418 !
   292 
   419 
   293 nextIdentifier
   420 nextIdentifier
   294     |nextChar string oldString 
   421     |nextChar string oldString 
   295      index "{ Class: SmallInteger }"
   422      index "{ Class: SmallInteger }"
   337 
   464 
   338     "Created: 1.2.1996 / 16:37:03 / cg"
   465     "Created: 1.2.1996 / 16:37:03 / cg"
   339     "Modified: 1.2.1996 / 16:37:28 / cg"
   466     "Modified: 1.2.1996 / 16:37:28 / cg"
   340 !
   467 !
   341 
   468 
       
   469 nextMantissa:radix
       
   470     |nextChar value factor|
       
   471 
       
   472     value := 0.
       
   473     factor := 1.0 / radix.
       
   474     nextChar := source peek.
       
   475     [(nextChar notNil and:[nextChar isDigitRadix:radix])] whileTrue:[
       
   476         value := value + (nextChar digitValue * factor).
       
   477         factor := factor / radix.
       
   478         nextChar := source nextPeek
       
   479     ].
       
   480     ^ value
       
   481 
       
   482     "Created: 1.2.1996 / 18:31:38 / cg"
       
   483 !
       
   484 
       
   485 nextNumber
       
   486     |nextChar value s|
       
   487 
       
   488     tokenRadix := 10.
       
   489     value := Integer readFrom:source radix:tokenRadix.
       
   490     nextChar := source peek.
       
   491     allowFloatNumbers ifTrue:[
       
   492         (nextChar == $.) ifTrue:[
       
   493             nextChar := source nextPeek.
       
   494             (nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
       
   495                 value := value asFloat + (self nextMantissa:tokenRadix).
       
   496                 nextChar := source peek
       
   497             ] ifFalse:[
       
   498                 nextChar == (Character cr) ifTrue:[
       
   499                     tokenLineNr := tokenLineNr + 1.
       
   500                 ].
       
   501                 peekChar := $.
       
   502             ]
       
   503         ].
       
   504         ((nextChar == $e) or:[nextChar == $E]) ifTrue:[
       
   505             nextChar := source nextPeek.
       
   506             (nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
       
   507                 s := 1.
       
   508                 (nextChar == $+) ifTrue:[
       
   509                     nextChar := source nextPeek
       
   510                 ] ifFalse:[
       
   511                     (nextChar == $-) ifTrue:[
       
   512                         nextChar := source nextPeek.
       
   513                         s := s negated
       
   514                     ]
       
   515                 ].
       
   516                 value := value asFloat
       
   517                          * (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
       
   518             ]
       
   519         ].
       
   520     ].
       
   521     tokenValue := value.
       
   522     (value isMemberOf:Float) ifTrue:[
       
   523         tokenType := #Float
       
   524     ] ifFalse:[
       
   525         tokenType := #Integer
       
   526     ].
       
   527     ^ tokenType
       
   528 
       
   529     "Modified: 1.2.1996 / 18:24:07 / cg"
       
   530     "Created: 1.2.1996 / 18:31:03 / cg"
       
   531 !
       
   532 
       
   533 nextSmalltalkNumber
       
   534     |nextChar value s|
       
   535 
       
   536     tokenRadix := 10.
       
   537     value := Integer readFrom:source radix:tokenRadix.
       
   538     nextChar := source peek.
       
   539     (nextChar == $r) ifTrue:[
       
   540         tokenRadix := value.
       
   541         source next.
       
   542         s := 1.
       
   543         source peek == $- ifTrue:[
       
   544             source next.
       
   545             s := -1
       
   546         ].
       
   547         value := Integer readFrom:source radix:tokenRadix.
       
   548         value := value * s.
       
   549         nextChar := source peek
       
   550     ].
       
   551     allowFloatNumbers ifTrue:[
       
   552         (nextChar == $.) ifTrue:[
       
   553             nextChar := source nextPeek.
       
   554             (nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
       
   555                 value := value asFloat + (self nextMantissa:tokenRadix).
       
   556                 nextChar := source peek
       
   557             ] ifFalse:[
       
   558                 nextChar == (Character cr) ifTrue:[
       
   559                     tokenLineNr := tokenLineNr + 1.
       
   560                 ].
       
   561                 peekChar := $.
       
   562             ]
       
   563         ].
       
   564         ((nextChar == $e) or:[nextChar == $E]) ifTrue:[
       
   565             nextChar := source nextPeek.
       
   566             (nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
       
   567                 s := 1.
       
   568                 (nextChar == $+) ifTrue:[
       
   569                     nextChar := source nextPeek
       
   570                 ] ifFalse:[
       
   571                     (nextChar == $-) ifTrue:[
       
   572                         nextChar := source nextPeek.
       
   573                         s := s negated
       
   574                     ]
       
   575                 ].
       
   576                 value := value asFloat
       
   577                          * (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
       
   578             ]
       
   579         ].
       
   580     ].
       
   581     tokenValue := value.
       
   582     (value isMemberOf:Float) ifTrue:[
       
   583         tokenType := #Float
       
   584     ] ifFalse:[
       
   585         tokenType := #Integer
       
   586     ].
       
   587     ^ tokenType
       
   588 
       
   589     "Created: 1.2.1996 / 18:19:05 / cg"
       
   590     "Modified: 1.2.1996 / 18:24:07 / cg"
       
   591 !
       
   592 
   342 nextString:separator
   593 nextString:separator
   343     |nextChar string pos
   594     |nextChar string pos
   344      index "{ Class: SmallInteger }"
   595      index "{ Class: SmallInteger }"
   345      len   "{ Class: SmallInteger }"
   596      len   "{ Class: SmallInteger }"
   346      inString|
   597      inString|
   388 
   639 
   389 nextToken
   640 nextToken
   390     "return the next token from the source-stream"
   641     "return the next token from the source-stream"
   391 
   642 
   392     |skipping actionBlock|
   643     |skipping actionBlock|
       
   644 
       
   645     tokenValue := tokenName := nil.
   393 
   646 
   394     peekChar notNil ifTrue:[
   647     peekChar notNil ifTrue:[
   395         hereChar := peekChar.
   648         hereChar := peekChar.
   396         peekChar := peekChar2.
   649         peekChar := peekChar2.
   397         peekChar2 := nil
   650         peekChar2 := nil
   445             ^ tokenType
   698             ^ tokenType
   446         ]
   699         ]
   447     ].
   700     ].
   448     tokenPosition := source position.
   701     tokenPosition := source position.
   449 
   702 
       
   703     types notNil ifTrue:[
       
   704         tokenType := types at:(hereChar asciiValue).
       
   705     ].
       
   706 
   450     actions notNil ifTrue:[
   707     actions notNil ifTrue:[
   451         actionBlock := actions at:(hereChar asciiValue).
   708         actionBlock := actions at:tokenType ifAbsent:nil.
   452         actionBlock notNil ifTrue:[
   709         actionBlock notNil ifTrue:[
   453             ^ actionBlock value:self value:hereChar
   710             ^ actionBlock value:self value:hereChar
   454         ]
   711         ]
   455     ].
   712     ].
   456 
   713 
   457     types notNil ifTrue:[
   714     source next.
   458         source next.
   715     tokenType isNil ifTrue:[
   459         tokenType := types at:(hereChar asciiValue).
   716         tokenType := #Error.
   460         tokenType notNil ifTrue:[
   717     ].
   461             ^ tokenType
   718     ^ tokenType
   462         ]
   719 
   463     ].
   720     "Modified: 1.2.1996 / 18:40:40 / cg"
   464 
       
   465     tokenType := #Error.
       
   466     ^ #Error
       
   467 
       
   468     "Modified: 1.2.1996 / 17:39:20 / cg"
       
   469 !
   721 !
   470 
   722 
   471 skipComment
   723 skipComment
   472     source next.
   724     source next.
   473     hereChar := source peek.
   725     hereChar := source peek.
   521 ! !
   773 ! !
   522 
   774 
   523 !TokenizedStream class methodsFor:'documentation'!
   775 !TokenizedStream class methodsFor:'documentation'!
   524 
   776 
   525 version
   777 version
   526     ^ '$Header: /cvs/stx/stx/libbasic2/TokenizedStream.st,v 1.2 1996-02-01 17:14:57 cg Exp $'
   778     ^ '$Header: /cvs/stx/stx/libbasic2/TokenizedStream.st,v 1.3 1996-02-01 17:44:48 cg Exp $'
   527 ! !
   779 ! !
   528 TokenizedStream initialize!
   780 TokenizedStream initialize!