TokenizedStream.st
author Claus Gittinger <cg@exept.de>
Thu, 01 Feb 1996 19:03:48 +0100
changeset 174 64e81af0a50b
parent 173 f8c6732b927c
child 175 ec15c4883886
permissions -rw-r--r--
allo negative numbers

ReadStream subclass:#TokenizedStream
	instanceVariableNames:'source token tokenType tokenPosition tokenName tokenLineNr
		tokenValue tokenRadix hereChar peekChar peekChar2
		beginCommentCharacter endCommentCharacter eolCommentCharacter
		eolCharacter outStream outCol actions types eolIsSignificant
		allowFloatNumbers numbersAreSigned'
	classVariableNames:'DefaultActions DefaultTypes'
	poolDictionaries:''
	category:'Streams'
!

!TokenizedStream class methodsFor:'documentation'!

For:'documentation'!

examples
"
    simple example:

        |s|

        s := TokenizedStream on:'hello world, how much is 3 + 2'.
        [s atEnd] whileFalse:[
            Transcript showCr:(s next).
        ].


    simple example2:

        |s token|

        s := TokenizedStream on:'foo bar baz  3 + 2'.
        [s atEnd] whileFalse:[
            token := s next.
            token == #Identifier ifTrue:[
                Transcript showCr:(token , ' name=' , s tokenName).
            ] ifFalse:[
                token == #Integer ifTrue:[
                    Transcript showCr:(token , ' value=' , s tokenValue printString).
                ] ifFalse:[
                    Transcript showCr:token.
                ]
            ]
        ].


    reading expressions:

        |s num1 num2|

        s := TokenizedStream on:'
3 + 2
4 + 6
1 + 2
'.
        [s atEnd] whileFalse:[
            s next == #Integer ifTrue:[
                num1 := s tokenValue.
                s next == $+ ifTrue:[
                    s next == #Integer ifTrue:[
                        num2 := s tokenValue.
                        Transcript showCr:num1 printString 
                                          , ' + ' 
                                          , num2 printString 
                                          , ' => ' 
                                          , (num1 + num2) printString.
                    ]
                ]
            ]
        ].


    with eol-comments:

        |s num1 num2|

        s := TokenizedStream on:'
3 + 2
; this is a comment
4 + 6
1 + 2
'.
        s eolCommentCharacter:$;.

        [s atEnd] whileFalse:[
            s next == #Integer ifTrue:[
                num1 := s tokenValue.
                s next == $+ ifTrue:[
                    s next == #Integer ifTrue:[
                        num2 := s tokenValue.
                        Transcript showCr:num1 printString 
                                          , ' + ' 
                                          , num2 printString 
                                          , ' => ' 
                                          , (num1 + num2) printString.
                    ]
                ]
            ]
        ].

    allowing float & negative numbers (the default):

        |s|

        s := TokenizedStream on:'1.23 4.56 7 8 9 -5 5 -5.0 5.0'.
        [s atEnd] whileFalse:[
            s next.
            Transcript showCr:(s tokenType displayString, ' value=' , s tokenValue printString).
        ].


    not allowing float numbers :

        |s|

        s := TokenizedStream on:'1.23 4.56 7 8 9 -5 5 -5.0 5.0 '.
        s allowFloatNumbers:false.

        [s atEnd] whileFalse:[
            s next.
            Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString).
        ].


    not allowing negative numbers :

        |s|

        s := TokenizedStream on:'1.23 4.56 7 8 9 -5 5 -5.0 5.0'.
        s numbersAreSigned:false.

        [s atEnd] whileFalse:[
            s next.
            Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString).
        ].


    no radix numbers (the default):

        |s|

        s := TokenizedStream on:'0x1234 16r1234'.

        [s atEnd] whileFalse:[
            s next.
            Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString , ' name=' , s tokenName displayString).
        ].


    C-style radix numbers:

        |s|

        s := TokenizedStream on:'0x1234 16r1234'.
        s actionTable at:#digit put:[:s :char | s nextCNumber].

        [s atEnd] whileFalse:[
            s next.
            Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString , ' name=' , s tokenName displayString).
        ].


    smalltalk-style radix numbers:

        |s|

        s := TokenizedStream on:'0x1234 16r1234'.
        s actionTable at:#digit put:[:s :char | s nextSmalltalkNumber].

        [s atEnd] whileFalse:[
            s next.
            Transcript showCr:(s tokenType displayString , ' value= ' , s tokenValue printString , ' name=' , s tokenName displayString).
        ].


    scan the '/etc/services' file:

        |s t service port protocol|

        s := TokenizedStream on:'/etc/services' asFilename readStream.
        s eolCommentCharacter:$#.
        s typeTable at:($- asciiValue) put:#letter.

        [s atEnd] whileFalse:[
            t := s next.
            t == #Identifier ifTrue:[
                service := s tokenName.
                t := s next.
                t == #Integer ifTrue:[
                    port := s tokenValue.
                    s next == $/ ifTrue:[
                        t := s next.
                        t == #Identifier ifTrue:[
                            protocol := s tokenName.
                            Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
                        ]
                    ]
                ]
            ].
            s skipToEol
        ]
"
! !

!TokenizedStream class methodsFor:'initialization'!

initialize
    DefaultActions := IdentityDictionary new.
    DefaultTypes := Array new:256.

    "kludge: action is nextColonOrAssign, but type is special"
    2 to:255 do:[:code |
        DefaultTypes at:code put:(Character value:code).
    ].

    ($0 asciiValue) to:($9 asciiValue) do:[:index |
        DefaultTypes at:index put:#digit.
    ].

    ($a asciiValue) to:($z asciiValue) do:[:index |
        DefaultTypes at:index put:#letter.
    ].
    ($A asciiValue) to:($Z asciiValue) do:[:index |
        DefaultTypes at:index put:#letter.
    ].

    DefaultActions at:#letter put:[:s :char | s nextIdentifier].
    DefaultActions at:#digit  put:[:s :char | s nextNumber].
    DefaultActions at:$-  put:[:s :char | s checkForNumberAfterSign].

    "
     TokenizedStream initialize
    "
! !

!TokenizedStream class methodsFor:'instance creation'!

s typeTable at:($- asciiValue) put:#letter.

        [s atEnd] whileFalse:[
            t := s next.
            t == #Identifier ifTrue:[
                service := s tokenName.
                t := s next.
                t == #Integer ifTrue:[
                    port := s tokenValue.
                    s next == $/ ifTrue:[
                        t := s next.
                        t == #Identifier ifTrue:[
                            protocol := s tokenName.
                            Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
                        ]
                    ]
                ]
            ].
            s skipToEol
        ]
"
! !

!TokenizedStream methodsFor:'accessing'!

t := s next.
            t == #Identifier ifTrue:[
                service := s tokenName.
                t := s next.
                t == #Integer ifTrue:[
                    port := s tokenValue.
                    s next == $/ ifTrue:[
                        t := s next.
                        t == #Identifier ifTrue:[
                            protocol := s tokenName.
                            Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
                        ]
                    ]
                ]
            ].
            s skipToEol
        ]
"
!

allowFloatNumbers:aBoolean
    "if false, floating numbers are not read; a period is returned as
     a separate token. If true (the default), floating point numbers are allowed."

    allowFloatNumbers := aBoolean

    "Modified: 1.2.1996 / 18:14:27 / cg"
    "Created: 1.2.1996 / 18:27:41 / cg"
!

vice := s tokenName.
                t := s next.
                t == #Integer ifTrue:[
                    port := s tokenValue.
                    s next == $/ ifTrue:[
                        t := s next.
                        t == #Identifier ifTrue:[
                            protocol := s tokenName.
                            Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
                        ]
                    ]
                ]
            ].
            s skipToEol
        ]
"
!

s tokenValue.
                    s next == $/ ifTrue:[
                        t := s next.
                        t == #Identifier ifTrue:[
                            protocol := s tokenName.
                            Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
                        ]
                    ]
                ]
            ].
            s skipToEol
        ]
"
!

t == #Identifier ifTrue:[
                            protocol := s tokenName.
                            Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
                        ]
                    ]
                ]
            ].
            s skipToEol
        ]
"
!

anscript showCr:(service , ' is ' , protocol , ' port=' , port printString).
                        ]
                    ]
                ]
            ].
            s skipToEol
        ]
"
!

numbersAreSigned:aBoolean
    "if false, minus signs preceeding numbers are ignored.
     if true (the default) they are recognized"

    numbersAreSigned := aBoolean

    "Modified: 1.2.1996 / 18:14:27 / cg"
    "Created: 1.2.1996 / 18:56:34 / cg"
!

'!

ltTypes := Array new:256.

    "kludge: action is nextColonOrAssign, but type is special"
    2 to:255 do:[:code |
        DefaultTypes at:code put:(Character value:code).
    ].

    ($0 asciiValue) to:($9 asciiValue) do:[:index |
        DefaultTypes at:index put:#digit.
    ].

    ($a asciiValue) to:($z asciiValue) do:[:index |
        DefaultTypes at:index put:#letter.
    ].
    ($A asciiValue) to:($Z asciiValue) do:[:index |
        DefaultTypes at:index put:#letter.
    ].

    DefaultActions at:#letter put:[:s :char | s nextIdentifier].
    DefaultActions at:#digit  put:[:s :char | s nextNumber].

    "
     TokenizedStream initialize
    "
!

ut type is special"
    2 to:255 do:[:code |
        DefaultTypes at:code put:(Character value:code).
    ].

    ($0 asciiValue) to:($9 asciiValue) do:[:index |
        DefaultTypes at:index put:#digit.
    ].

    ($a asciiValue) to:($z asciiValue) do:[:index |
        DefaultTypes at:index put:#letter.
    ].
    ($A asciiValue) to:($Z asciiValue) do:[:index |
        DefaultTypes at:index put:#letter.
    ].

    DefaultActions at:#letter put:[:s :char | s nextIdentifier].
    DefaultActions at:#digit  put:[:s :char | s nextNumber].

    "
     TokenizedStream initialize
    "
!

e put:(Character value:code).
    ].

    ($0 asciiValue) to:($9 asciiValue) do:[:index |
        DefaultTypes at:index put:#digit.
    ].

    ($a asciiValue) to:($z asciiValue) do:[:index |
        DefaultTypes at:index put:#letter.
    ].
    ($A asciiValue) to:($Z asciiValue) do:[:index |
        DefaultTypes at:index put:#letter.
    ].

    DefaultActions at:#letter put:[:s :char | s nextIdentifier].
    DefaultActions at:#digit  put:[:s :char | s nextNumber].

    "
     TokenizedStream initialize
    "
! !

!TokenizedStream methodsFor:'initialization'!

initialize
    tokenLineNr := 1.
    eolCommentCharacter := beginCommentCharacter := endCommentCharacter := nil.
    eolCharacter := Character cr.
    eolIsSignificant := false.

    actions := DefaultActions shallowCopy.
    types := DefaultTypes shallowCopy.
    allowFloatNumbers := true.
    numbersAreSigned := true.

    "Modified: 1.2.1996 / 19:00:50 / cg"
! !

!TokenizedStream methodsFor:'private'!

put:[:s :char | s nextNumber].

    "
     TokenizedStream initialize
    "
! !

!TokenizedStream methodsFor:'reading'!

checkForNumberAfterSign
    |next|

    source next.
    numbersAreSigned ifTrue:[
        next := source peek .
        (next notNil and:[(types at:next asciiValue) == #digit]) ifTrue:[
            (actions at:#digit) value:self value:next.
            tokenValue := tokenValue negated.
        ]
    ]

    "Modified: 1.2.1996 / 19:02:46 / cg"
!

rs:aBoolean
    "if false, floating numbers are not read; a period is returned as
     a separate token. If true (the default), floating point numbers are allowed."

    allowFloatNumbers := aBoolean

    "Modified: 1.2.1996 / 18:14:27 / cg"
    "Created: 1.2.1996 / 18:27:41 / cg"
!

nextCNumber
    |nextChar value s|

    tokenRadix := 10.
    source peek == $0 ifTrue:[
        source next.
        source peek == $x ifTrue:[
            source next.
            tokenRadix := 16.
        ] ifFalse:[
            tokenRadix := 8
        ]
    ].

    value := Integer readFrom:source radix:tokenRadix.
    nextChar := source peek.

    (allowFloatNumbers and:[tokenRadix == 10]) ifTrue:[
        (nextChar == $.) ifTrue:[
            nextChar := source nextPeek.
            (nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
                value := value asFloat + (self nextMantissa:tokenRadix).
                nextChar := source peek
            ] ifFalse:[
                nextChar == (Character cr) ifTrue:[
                    tokenLineNr := tokenLineNr + 1.
                ].
                peekChar := $.
            ]
        ].
        ((nextChar == $e) or:[nextChar == $E]) ifTrue:[
            nextChar := source nextPeek.
            (nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
                s := 1.
                (nextChar == $+) ifTrue:[
                    nextChar := source nextPeek
                ] ifFalse:[
                    (nextChar == $-) ifTrue:[
                        nextChar := source nextPeek.
                        s := s negated
                    ]
                ].
                value := value asFloat
                         * (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
            ]
        ].
    ].
    tokenValue := value.
    (value isMemberOf:Float) ifTrue:[
        tokenType := #Float
    ] ifFalse:[
        tokenType := #Integer
    ].
    ^ tokenType

    "Created: 1.2.1996 / 18:26:27 / cg"
!

returned as
     a separate token. If true (the default), floating point numbers are allowed."

    allowFloatNumbers := aBoolean

    "Modified: 1.2.1996 / 18:14:27 / cg"
    "Created: 1.2.1996 / 18:27:41 / cg"
!

peTable
    ^ types

    "Created: 1.2.1996 / 17:41:54 / cg"
!

nextMantissa:radix
    |nextChar value factor|

    value := 0.
    factor := 1.0 / radix.
    nextChar := source peek.
    [(nextChar notNil and:[nextChar isDigitRadix:radix])] whileTrue:[
        value := value + (nextChar digitValue * factor).
        factor := factor / radix.
        nextChar := source nextPeek
    ].
    ^ value

    "Created: 1.2.1996 / 18:31:38 / cg"
!

nextNumber
    |nextChar value s|

    tokenRadix := 10.
    value := Integer readFrom:source radix:tokenRadix.
    nextChar := source peek.
    allowFloatNumbers ifTrue:[
        (nextChar == $.) ifTrue:[
            nextChar := source nextPeek.
            (nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
                value := value asFloat + (self nextMantissa:tokenRadix).
                nextChar := source peek
            ] ifFalse:[
                nextChar == (Character cr) ifTrue:[
                    tokenLineNr := tokenLineNr + 1.
                ].
                peekChar := $.
            ]
        ].
        ((nextChar == $e) or:[nextChar == $E]) ifTrue:[
            nextChar := source nextPeek.
            (nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
                s := 1.
                (nextChar == $+) ifTrue:[
                    nextChar := source nextPeek
                ] ifFalse:[
                    (nextChar == $-) ifTrue:[
                        nextChar := source nextPeek.
                        s := s negated
                    ]
                ].
                value := value asFloat
                         * (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
            ]
        ].
    ].
    tokenValue := value.
    (value isMemberOf:Float) ifTrue:[
        tokenType := #Float
    ] ifFalse:[
        tokenType := #Integer
    ].
    ^ tokenType

    "Modified: 1.2.1996 / 18:24:07 / cg"
    "Created: 1.2.1996 / 18:31:03 / cg"
!

nextSmalltalkNumber
    |nextChar value s|

    tokenRadix := 10.
    value := Integer readFrom:source radix:tokenRadix.
    nextChar := source peek.
    (nextChar == $r) ifTrue:[
        tokenRadix := value.
        source next.
        s := 1.
        source peek == $- ifTrue:[
            source next.
            s := -1
        ].
        value := Integer readFrom:source radix:tokenRadix.
        value := value * s.
        nextChar := source peek
    ].
    allowFloatNumbers ifTrue:[
        (nextChar == $.) ifTrue:[
            nextChar := source nextPeek.
            (nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
                value := value asFloat + (self nextMantissa:tokenRadix).
                nextChar := source peek
            ] ifFalse:[
                nextChar == (Character cr) ifTrue:[
                    tokenLineNr := tokenLineNr + 1.
                ].
                peekChar := $.
            ]
        ].
        ((nextChar == $e) or:[nextChar == $E]) ifTrue:[
            nextChar := source nextPeek.
            (nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
                s := 1.
                (nextChar == $+) ifTrue:[
                    nextChar := source nextPeek
                ] ifFalse:[
                    (nextChar == $-) ifTrue:[
                        nextChar := source nextPeek.
                        s := s negated
                    ]
                ].
                value := value asFloat
                         * (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
            ]
        ].
    ].
    tokenValue := value.
    (value isMemberOf:Float) ifTrue:[
        tokenType := #Float
    ] ifFalse:[
        tokenType := #Integer
    ].
    ^ tokenType

    "Created: 1.2.1996 / 18:19:05 / cg"
    "Modified: 1.2.1996 / 18:24:07 / cg"
!

ter := nil.
    eolCharacter := Character cr.
    eolIsSignificant := false.

    actions := DefaultActions shallowCopy.
    types := DefaultTypes shallowCopy.
    allowFloatNumbers := true.

    "Modified: 1.2.1996 / 18:42:54 / cg"
!

nextToken
    "return the next token from the source-stream"

    |skipping actionBlock|

    tokenValue := tokenName := nil.

    peekChar notNil ifTrue:[
        hereChar := peekChar.
        peekChar := peekChar2.
        peekChar2 := nil
    ] ifFalse:[
        skipping := true.
        [skipping] whileTrue:[
            outStream notNil ifTrue:[
                [(hereChar := source peek) == Character space] whileTrue:[
                    source next.
                    outStream space. 
                    outCol := outCol + 1.
                ]
            ] ifFalse:[
                hereChar := source skipSeparatorsExceptCR.
            ].
            hereChar isNil ifTrue:[
                tokenType := #EOF.
                ^ tokenType
            ].
            hereChar == eolCharacter ifTrue:[
                tokenLineNr := tokenLineNr + 1.
                source next.
                outStream notNil ifTrue:[
                    outStream cr.
                    outCol := 1
                ].
                eolIsSignificant ifTrue:[
                    tokenType := #EOL.
                    ^ tokenType
                ]
            ] ifFalse:[
                hereChar == beginCommentCharacter ifTrue:[
                    "start of a comment"

                    self skipComment.
                    hereChar := source peek.
                ] ifFalse:[
                    hereChar == eolCommentCharacter ifTrue:[
                        "start of an eol comment"

                        self skipEolComment.
                        hereChar := source peek.
                    ] ifFalse:[
                        skipping := false
                    ]
                ]
            ]
        ].
        hereChar isNil ifTrue:[
            tokenType := #EOF.
            ^ tokenType
        ]
    ].
    tokenPosition := source position.

    types notNil ifTrue:[
        tokenType := types at:(hereChar asciiValue).
    ].

    actions notNil ifTrue:[
        actionBlock := actions at:tokenType ifAbsent:nil.
        actionBlock notNil ifTrue:[
            ^ actionBlock value:self value:hereChar
        ]
    ].

    source next.
    tokenType isNil ifTrue:[
        tokenType := #Error.
    ].
    ^ tokenType

    "Modified: 1.2.1996 / 18:40:40 / cg"
!

eger readFrom:source radix:10.
    tokenRadix := 10.
    tokenType := #Integer.
    ^ tokenType

    "Created: 1.2.1996 / 16:37:03 / cg"
    "Modified: 1.2.1996 / 16:37:28 / cg"
!

Char := source nextPeek
    ].
    ^ value

    "Created: 1.2.1996 / 18:31:38 / cg"
!

:= 10.
    value := Integer readFrom:source radix:tokenRadix.
    nextChar := source peek.
    allowFloatNumbers ifTrue:[
        (nextChar == $.) ifTrue:[
            nextChar := source nextPeek.
            (nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
                value := value asFloat + (self nextMantissa:tokenRadix).
                nextChar := source peek
            ] ifFalse:[
                nextChar == (Character cr) ifTrue:[
                    tokenLineNr := tokenLineNr + 1.
                ].
                peekChar := $.
            ]
        ].
        ((nextChar == $e) or:[nextChar == $E]) ifTrue:[
            nextChar := source nextPeek.
            (nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
                s := 1.
                (nextChar == $+) ifTrue:[
                    nextChar := source nextPeek
                ] ifFalse:[
                    (nextChar == $-) ifTrue:[
                        nextChar := source nextPeek.
                        s := s negated
                    ]
                ].
                value := value asFloat
                         * (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
            ]
        ].
    ].
    tokenValue := value.
    (value isMemberOf:Float) ifTrue:[
        tokenType := #Float
    ] ifFalse:[
        tokenType := #Integer
    ].
    ^ tokenType

    "Modified: 1.2.1996 / 18:24:07 / cg"
    "Created: 1.2.1996 / 18:31:03 / cg"
! !

!TokenizedStream methodsFor:'testing'!

extChar == (Character cr) ifTrue:[
                    tokenLineNr := tokenLineNr + 1.
                ].
                peekChar := $.
            ]
        ].
        ((nextChar == $e) or:[nextChar == $E]) ifTrue:[
            nextChar := source nextPeek.
            (nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
                s := 1.
                (nextChar == $+) ifTrue:[
                    nextChar := source nextPeek
                ] ifFalse:[
                    (nextChar == $-) ifTrue:[
                        nextChar := source nextPeek.
                        s := s negated
                    ]
                ].
                value := value asFloat
                         * (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
            ]
        ].
    ].
    tokenValue := value.
    (value isMemberOf:Float) ifTrue:[
        tokenType := #Float
    ] ifFalse:[
        tokenType := #Integer
    ].
    ^ tokenType

    "Modified: 1.2.1996 / 18:24:07 / cg"
    "Created: 1.2.1996 / 18:31:03 / cg"
! !

!TokenizedStream class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic2/TokenizedStream.st,v 1.4 1996-02-01 18:03:48 cg Exp $'
! !
TokenizedStream initialize!