ReadStream subclass:#TokenizedStream
instanceVariableNames:'source token tokenType tokenPosition tokenName tokenLineNr
tokenValue tokenRadix hereChar peekChar peekChar2
beginCommentCharacter endCommentCharacter eolCommentCharacter
eolCharacter outStream outCol actions types eolIsSignificant
allowFloatNumbers numbersAreSigned'
classVariableNames:'DefaultActions DefaultTypes'
poolDictionaries:''
category:'Streams'
!
!TokenizedStream class methodsFor:'documentation'!
For:'documentation'!
examples
"
simple example:
|s|
s := TokenizedStream on:'hello world, how much is 3 + 2'.
[s atEnd] whileFalse:[
Transcript showCr:(s next).
].
simple example2:
|s token|
s := TokenizedStream on:'foo bar baz 3 + 2'.
[s atEnd] whileFalse:[
token := s next.
token == #Identifier ifTrue:[
Transcript showCr:(token , ' name=' , s tokenName).
] ifFalse:[
token == #Integer ifTrue:[
Transcript showCr:(token , ' value=' , s tokenValue printString).
] ifFalse:[
Transcript showCr:token.
]
]
].
reading expressions:
|s num1 num2|
s := TokenizedStream on:'
3 + 2
4 + 6
1 + 2
'.
[s atEnd] whileFalse:[
s next == #Integer ifTrue:[
num1 := s tokenValue.
s next == $+ ifTrue:[
s next == #Integer ifTrue:[
num2 := s tokenValue.
Transcript showCr:num1 printString
, ' + '
, num2 printString
, ' => '
, (num1 + num2) printString.
]
]
]
].
with eol-comments:
|s num1 num2|
s := TokenizedStream on:'
3 + 2
; this is a comment
4 + 6
1 + 2
'.
s eolCommentCharacter:$;.
[s atEnd] whileFalse:[
s next == #Integer ifTrue:[
num1 := s tokenValue.
s next == $+ ifTrue:[
s next == #Integer ifTrue:[
num2 := s tokenValue.
Transcript showCr:num1 printString
, ' + '
, num2 printString
, ' => '
, (num1 + num2) printString.
]
]
]
].
allowing float & negative numbers (the default):
|s|
s := TokenizedStream on:'1.23 4.56 7 8 9 -5 5 -5.0 5.0'.
[s atEnd] whileFalse:[
s next.
Transcript showCr:(s tokenType displayString, ' value=' , s tokenValue printString).
].
not allowing float numbers :
|s|
s := TokenizedStream on:'1.23 4.56 7 8 9 -5 5 -5.0 5.0 '.
s allowFloatNumbers:false.
[s atEnd] whileFalse:[
s next.
Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString).
].
not allowing negative numbers :
|s|
s := TokenizedStream on:'1.23 4.56 7 8 9 -5 5 -5.0 5.0'.
s numbersAreSigned:false.
[s atEnd] whileFalse:[
s next.
Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString).
].
no radix numbers (the default):
|s|
s := TokenizedStream on:'0x1234 16r1234'.
[s atEnd] whileFalse:[
s next.
Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString , ' name=' , s tokenName displayString).
].
C-style radix numbers:
|s|
s := TokenizedStream on:'0x1234 16r1234'.
s actionTable at:#digit put:[:s :char | s nextCNumber].
[s atEnd] whileFalse:[
s next.
Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString , ' name=' , s tokenName displayString).
].
smalltalk-style radix numbers:
|s|
s := TokenizedStream on:'0x1234 16r1234'.
s actionTable at:#digit put:[:s :char | s nextSmalltalkNumber].
[s atEnd] whileFalse:[
s next.
Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString , ' name=' , s tokenName displayString).
].
scan the '/etc/services' file:
|s t service port protocol|
s := TokenizedStream on:'/etc/services' asFilename readStream.
s eolCommentCharacter:$#.
s typeTable at:($- asciiValue) put:#letter.
[s atEnd] whileFalse:[
t := s next.
t == #Identifier ifTrue:[
service := s tokenName.
t := s next.
t == #Integer ifTrue:[
port := s tokenValue.
s next == $/ ifTrue:[
t := s next.
t == #Identifier ifTrue:[
protocol := s tokenName.
Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
]
]
]
].
s skipToEol
]
"
! !
!TokenizedStream class methodsFor:'initialization'!
initialize
DefaultActions := IdentityDictionary new.
DefaultTypes := Array new:256.
"kludge: action is nextColonOrAssign, but type is special"
2 to:255 do:[:code |
DefaultTypes at:code put:(Character value:code).
].
($0 asciiValue) to:($9 asciiValue) do:[:index |
DefaultTypes at:index put:#digit.
].
($a asciiValue) to:($z asciiValue) do:[:index |
DefaultTypes at:index put:#letter.
].
($A asciiValue) to:($Z asciiValue) do:[:index |
DefaultTypes at:index put:#letter.
].
DefaultActions at:#letter put:[:s :char | s nextIdentifier].
DefaultActions at:#digit put:[:s :char | s nextNumber].
DefaultActions at:$- put:[:s :char | s checkForNumberAfterSign].
"
TokenizedStream initialize
"
! !
!TokenizedStream class methodsFor:'instance creation'!
s typeTable at:($- asciiValue) put:#letter.
[s atEnd] whileFalse:[
t := s next.
t == #Identifier ifTrue:[
service := s tokenName.
t := s next.
t == #Integer ifTrue:[
port := s tokenValue.
s next == $/ ifTrue:[
t := s next.
t == #Identifier ifTrue:[
protocol := s tokenName.
Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
]
]
]
].
s skipToEol
]
"
! !
!TokenizedStream methodsFor:'accessing'!
t := s next.
t == #Identifier ifTrue:[
service := s tokenName.
t := s next.
t == #Integer ifTrue:[
port := s tokenValue.
s next == $/ ifTrue:[
t := s next.
t == #Identifier ifTrue:[
protocol := s tokenName.
Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
]
]
]
].
s skipToEol
]
"
!
allowFloatNumbers:aBoolean
"if false, floating numbers are not read; a period is returned as
a separate token. If true (the default), floating point numbers are allowed."
allowFloatNumbers := aBoolean
"Modified: 1.2.1996 / 18:14:27 / cg"
"Created: 1.2.1996 / 18:27:41 / cg"
!
vice := s tokenName.
t := s next.
t == #Integer ifTrue:[
port := s tokenValue.
s next == $/ ifTrue:[
t := s next.
t == #Identifier ifTrue:[
protocol := s tokenName.
Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
]
]
]
].
s skipToEol
]
"
!
s tokenValue.
s next == $/ ifTrue:[
t := s next.
t == #Identifier ifTrue:[
protocol := s tokenName.
Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
]
]
]
].
s skipToEol
]
"
!
t == #Identifier ifTrue:[
protocol := s tokenName.
Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
]
]
]
].
s skipToEol
]
"
!
anscript showCr:(service , ' is ' , protocol , ' port=' , port printString).
]
]
]
].
s skipToEol
]
"
!
numbersAreSigned:aBoolean
"if false, minus signs preceeding numbers are ignored.
if true (the default) they are recognized"
numbersAreSigned := aBoolean
"Modified: 1.2.1996 / 18:14:27 / cg"
"Created: 1.2.1996 / 18:56:34 / cg"
!
'!
ltTypes := Array new:256.
"kludge: action is nextColonOrAssign, but type is special"
2 to:255 do:[:code |
DefaultTypes at:code put:(Character value:code).
].
($0 asciiValue) to:($9 asciiValue) do:[:index |
DefaultTypes at:index put:#digit.
].
($a asciiValue) to:($z asciiValue) do:[:index |
DefaultTypes at:index put:#letter.
].
($A asciiValue) to:($Z asciiValue) do:[:index |
DefaultTypes at:index put:#letter.
].
DefaultActions at:#letter put:[:s :char | s nextIdentifier].
DefaultActions at:#digit put:[:s :char | s nextNumber].
"
TokenizedStream initialize
"
!
ut type is special"
2 to:255 do:[:code |
DefaultTypes at:code put:(Character value:code).
].
($0 asciiValue) to:($9 asciiValue) do:[:index |
DefaultTypes at:index put:#digit.
].
($a asciiValue) to:($z asciiValue) do:[:index |
DefaultTypes at:index put:#letter.
].
($A asciiValue) to:($Z asciiValue) do:[:index |
DefaultTypes at:index put:#letter.
].
DefaultActions at:#letter put:[:s :char | s nextIdentifier].
DefaultActions at:#digit put:[:s :char | s nextNumber].
"
TokenizedStream initialize
"
!
e put:(Character value:code).
].
($0 asciiValue) to:($9 asciiValue) do:[:index |
DefaultTypes at:index put:#digit.
].
($a asciiValue) to:($z asciiValue) do:[:index |
DefaultTypes at:index put:#letter.
].
($A asciiValue) to:($Z asciiValue) do:[:index |
DefaultTypes at:index put:#letter.
].
DefaultActions at:#letter put:[:s :char | s nextIdentifier].
DefaultActions at:#digit put:[:s :char | s nextNumber].
"
TokenizedStream initialize
"
! !
!TokenizedStream methodsFor:'initialization'!
initialize
tokenLineNr := 1.
eolCommentCharacter := beginCommentCharacter := endCommentCharacter := nil.
eolCharacter := Character cr.
eolIsSignificant := false.
actions := DefaultActions shallowCopy.
types := DefaultTypes shallowCopy.
allowFloatNumbers := true.
numbersAreSigned := true.
"Modified: 1.2.1996 / 19:00:50 / cg"
! !
!TokenizedStream methodsFor:'private'!
put:[:s :char | s nextNumber].
"
TokenizedStream initialize
"
! !
!TokenizedStream methodsFor:'reading'!
checkForNumberAfterSign
|next|
source next.
numbersAreSigned ifTrue:[
next := source peek .
(next notNil and:[(types at:next asciiValue) == #digit]) ifTrue:[
(actions at:#digit) value:self value:next.
tokenValue := tokenValue negated.
]
]
"Modified: 1.2.1996 / 19:02:46 / cg"
!
rs:aBoolean
"if false, floating numbers are not read; a period is returned as
a separate token. If true (the default), floating point numbers are allowed."
allowFloatNumbers := aBoolean
"Modified: 1.2.1996 / 18:14:27 / cg"
"Created: 1.2.1996 / 18:27:41 / cg"
!
nextCNumber
|nextChar value s|
tokenRadix := 10.
source peek == $0 ifTrue:[
source next.
source peek == $x ifTrue:[
source next.
tokenRadix := 16.
] ifFalse:[
tokenRadix := 8
]
].
value := Integer readFrom:source radix:tokenRadix.
nextChar := source peek.
(allowFloatNumbers and:[tokenRadix == 10]) ifTrue:[
(nextChar == $.) ifTrue:[
nextChar := source nextPeek.
(nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
value := value asFloat + (self nextMantissa:tokenRadix).
nextChar := source peek
] ifFalse:[
nextChar == (Character cr) ifTrue:[
tokenLineNr := tokenLineNr + 1.
].
peekChar := $.
]
].
((nextChar == $e) or:[nextChar == $E]) ifTrue:[
nextChar := source nextPeek.
(nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
s := 1.
(nextChar == $+) ifTrue:[
nextChar := source nextPeek
] ifFalse:[
(nextChar == $-) ifTrue:[
nextChar := source nextPeek.
s := s negated
]
].
value := value asFloat
* (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
]
].
].
tokenValue := value.
(value isMemberOf:Float) ifTrue:[
tokenType := #Float
] ifFalse:[
tokenType := #Integer
].
^ tokenType
"Created: 1.2.1996 / 18:26:27 / cg"
!
returned as
a separate token. If true (the default), floating point numbers are allowed."
allowFloatNumbers := aBoolean
"Modified: 1.2.1996 / 18:14:27 / cg"
"Created: 1.2.1996 / 18:27:41 / cg"
!
peTable
^ types
"Created: 1.2.1996 / 17:41:54 / cg"
!
nextMantissa:radix
|nextChar value factor|
value := 0.
factor := 1.0 / radix.
nextChar := source peek.
[(nextChar notNil and:[nextChar isDigitRadix:radix])] whileTrue:[
value := value + (nextChar digitValue * factor).
factor := factor / radix.
nextChar := source nextPeek
].
^ value
"Created: 1.2.1996 / 18:31:38 / cg"
!
nextNumber
|nextChar value s|
tokenRadix := 10.
value := Integer readFrom:source radix:tokenRadix.
nextChar := source peek.
allowFloatNumbers ifTrue:[
(nextChar == $.) ifTrue:[
nextChar := source nextPeek.
(nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
value := value asFloat + (self nextMantissa:tokenRadix).
nextChar := source peek
] ifFalse:[
nextChar == (Character cr) ifTrue:[
tokenLineNr := tokenLineNr + 1.
].
peekChar := $.
]
].
((nextChar == $e) or:[nextChar == $E]) ifTrue:[
nextChar := source nextPeek.
(nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
s := 1.
(nextChar == $+) ifTrue:[
nextChar := source nextPeek
] ifFalse:[
(nextChar == $-) ifTrue:[
nextChar := source nextPeek.
s := s negated
]
].
value := value asFloat
* (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
]
].
].
tokenValue := value.
(value isMemberOf:Float) ifTrue:[
tokenType := #Float
] ifFalse:[
tokenType := #Integer
].
^ tokenType
"Modified: 1.2.1996 / 18:24:07 / cg"
"Created: 1.2.1996 / 18:31:03 / cg"
!
nextSmalltalkNumber
|nextChar value s|
tokenRadix := 10.
value := Integer readFrom:source radix:tokenRadix.
nextChar := source peek.
(nextChar == $r) ifTrue:[
tokenRadix := value.
source next.
s := 1.
source peek == $- ifTrue:[
source next.
s := -1
].
value := Integer readFrom:source radix:tokenRadix.
value := value * s.
nextChar := source peek
].
allowFloatNumbers ifTrue:[
(nextChar == $.) ifTrue:[
nextChar := source nextPeek.
(nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
value := value asFloat + (self nextMantissa:tokenRadix).
nextChar := source peek
] ifFalse:[
nextChar == (Character cr) ifTrue:[
tokenLineNr := tokenLineNr + 1.
].
peekChar := $.
]
].
((nextChar == $e) or:[nextChar == $E]) ifTrue:[
nextChar := source nextPeek.
(nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
s := 1.
(nextChar == $+) ifTrue:[
nextChar := source nextPeek
] ifFalse:[
(nextChar == $-) ifTrue:[
nextChar := source nextPeek.
s := s negated
]
].
value := value asFloat
* (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
]
].
].
tokenValue := value.
(value isMemberOf:Float) ifTrue:[
tokenType := #Float
] ifFalse:[
tokenType := #Integer
].
^ tokenType
"Created: 1.2.1996 / 18:19:05 / cg"
"Modified: 1.2.1996 / 18:24:07 / cg"
!
ter := nil.
eolCharacter := Character cr.
eolIsSignificant := false.
actions := DefaultActions shallowCopy.
types := DefaultTypes shallowCopy.
allowFloatNumbers := true.
"Modified: 1.2.1996 / 18:42:54 / cg"
!
nextToken
"return the next token from the source-stream"
|skipping actionBlock|
tokenValue := tokenName := nil.
peekChar notNil ifTrue:[
hereChar := peekChar.
peekChar := peekChar2.
peekChar2 := nil
] ifFalse:[
skipping := true.
[skipping] whileTrue:[
outStream notNil ifTrue:[
[(hereChar := source peek) == Character space] whileTrue:[
source next.
outStream space.
outCol := outCol + 1.
]
] ifFalse:[
hereChar := source skipSeparatorsExceptCR.
].
hereChar isNil ifTrue:[
tokenType := #EOF.
^ tokenType
].
hereChar == eolCharacter ifTrue:[
tokenLineNr := tokenLineNr + 1.
source next.
outStream notNil ifTrue:[
outStream cr.
outCol := 1
].
eolIsSignificant ifTrue:[
tokenType := #EOL.
^ tokenType
]
] ifFalse:[
hereChar == beginCommentCharacter ifTrue:[
"start of a comment"
self skipComment.
hereChar := source peek.
] ifFalse:[
hereChar == eolCommentCharacter ifTrue:[
"start of an eol comment"
self skipEolComment.
hereChar := source peek.
] ifFalse:[
skipping := false
]
]
]
].
hereChar isNil ifTrue:[
tokenType := #EOF.
^ tokenType
]
].
tokenPosition := source position.
types notNil ifTrue:[
tokenType := types at:(hereChar asciiValue).
].
actions notNil ifTrue:[
actionBlock := actions at:tokenType ifAbsent:nil.
actionBlock notNil ifTrue:[
^ actionBlock value:self value:hereChar
]
].
source next.
tokenType isNil ifTrue:[
tokenType := #Error.
].
^ tokenType
"Modified: 1.2.1996 / 18:40:40 / cg"
!
eger readFrom:source radix:10.
tokenRadix := 10.
tokenType := #Integer.
^ tokenType
"Created: 1.2.1996 / 16:37:03 / cg"
"Modified: 1.2.1996 / 16:37:28 / cg"
!
Char := source nextPeek
].
^ value
"Created: 1.2.1996 / 18:31:38 / cg"
!
:= 10.
value := Integer readFrom:source radix:tokenRadix.
nextChar := source peek.
allowFloatNumbers ifTrue:[
(nextChar == $.) ifTrue:[
nextChar := source nextPeek.
(nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
value := value asFloat + (self nextMantissa:tokenRadix).
nextChar := source peek
] ifFalse:[
nextChar == (Character cr) ifTrue:[
tokenLineNr := tokenLineNr + 1.
].
peekChar := $.
]
].
((nextChar == $e) or:[nextChar == $E]) ifTrue:[
nextChar := source nextPeek.
(nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
s := 1.
(nextChar == $+) ifTrue:[
nextChar := source nextPeek
] ifFalse:[
(nextChar == $-) ifTrue:[
nextChar := source nextPeek.
s := s negated
]
].
value := value asFloat
* (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
]
].
].
tokenValue := value.
(value isMemberOf:Float) ifTrue:[
tokenType := #Float
] ifFalse:[
tokenType := #Integer
].
^ tokenType
"Modified: 1.2.1996 / 18:24:07 / cg"
"Created: 1.2.1996 / 18:31:03 / cg"
! !
!TokenizedStream methodsFor:'testing'!
extChar == (Character cr) ifTrue:[
tokenLineNr := tokenLineNr + 1.
].
peekChar := $.
]
].
((nextChar == $e) or:[nextChar == $E]) ifTrue:[
nextChar := source nextPeek.
(nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
s := 1.
(nextChar == $+) ifTrue:[
nextChar := source nextPeek
] ifFalse:[
(nextChar == $-) ifTrue:[
nextChar := source nextPeek.
s := s negated
]
].
value := value asFloat
* (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
]
].
].
tokenValue := value.
(value isMemberOf:Float) ifTrue:[
tokenType := #Float
] ifFalse:[
tokenType := #Integer
].
^ tokenType
"Modified: 1.2.1996 / 18:24:07 / cg"
"Created: 1.2.1996 / 18:31:03 / cg"
! !
!TokenizedStream class methodsFor:'documentation'!
version
^ '$Header: /cvs/stx/stx/libbasic2/TokenizedStream.st,v 1.4 1996-02-01 18:03:48 cg Exp $'
! !
TokenizedStream initialize!