TokenizedStream.st
author Claus Gittinger <cg@exept.de>
Thu, 01 Feb 1996 18:10:11 +0100
changeset 171 ce00d8aab37c
child 172 99b850002359
permissions -rw-r--r--
some more goodies

'From Smalltalk/X, Version:2.10.8 on 1-feb-1996 at 6:09:27 pm'                  !

ReadStream subclass:#TokenizedStream
	instanceVariableNames:'source token tokenType tokenPosition tokenName tokenLineNr
		tokenValue tokenRadix hereChar peekChar peekChar2
		beginCommentCharacter endCommentCharacter eolCommentCharacter
		eolCharacter outStream outCol actions types eolIsSignificant'
	classVariableNames:'DefaultActions DefaultTypes'
	poolDictionaries:''
	category:'Streams'
!

!TokenizedStream class methodsFor:'documentation'!

documentation
"
    a first version of a tokenStream.
    This is still being constructed and will finally help a simplified
    Scanner class.
    For now, it may be useful when textual input files are to be read and
    parsed. For example, ascii data files are often in a simple free form format
    which requires some little processing.

    operation:

        a TokenizedStream reads characters from its real input stream
        and dispatches to a toke reading method by the help of an actionArray,
        which is indexed by the characters ascii code.
        By default, the table is setup to only read numbers (integers)
        and identifiers. Whitespace is ignored, and all other characters return
        themself.

    The returned tokens are either symbols (#Identifier / #Integer) or
    characters ($+ $, etc.)
    If its an Identifier, the name is found in tokenName (there is an access method for that).
    If its an Integer, the value is found in tokenValue.

    EndOfLine is returned as #EOL; end of input as #EOF.
    Unrecognized input leads to #Error to be returned.
"
!

examples
"
    simple example:

        |s|

        s := TokenizedStream on:'hello world, how much is 3 + 2'.
        [s atEnd] whileFalse:[
            Transcript showCr:(s next).
        ].


    simple example2:

        |s token|

        s := TokenizedStream on:'foo bar baz  3 + 2'.
        [s atEnd] whileFalse:[
            token := s next.
            token == #Identifier ifTrue:[
                Transcript showCr:(token , ' name=' , s tokenName).
            ] ifFalse:[
                token == #Integer ifTrue:[
                    Transcript showCr:(token , ' value=' , s tokenValue printString).
                ] ifFalse:[
                    Transcript showCr:token.
                ]
            ]
        ].


    reading expressions:

        |s num1 num2|

        s := TokenizedStream on:'
3 + 2
4 + 6
1 + 2
'.
        [s atEnd] whileFalse:[
            s next == #Integer ifTrue:[
                num1 := s tokenValue.
                s next == $+ ifTrue:[
                    s next == #Integer ifTrue:[
                        num2 := s tokenValue.
                        Transcript showCr:num1 printString 
                                          , ' + ' 
                                          , num2 printString 
                                          , ' => ' 
                                          , (num1 + num2) printString.
                    ]
                ]
            ]
        ].


    with eol-comments:

        |s num1 num2|

        s := TokenizedStream on:'
3 + 2
; this is a comment
4 + 6
1 + 2
'.
        s eolCommentCharacter:$;.

        [s atEnd] whileFalse:[
            s next == #Integer ifTrue:[
                num1 := s tokenValue.
                s next == $+ ifTrue:[
                    s next == #Integer ifTrue:[
                        num2 := s tokenValue.
                        Transcript showCr:num1 printString 
                                          , ' + ' 
                                          , num2 printString 
                                          , ' => ' 
                                          , (num1 + num2) printString.
                    ]
                ]
            ]
        ].


    scan /etc/services file:

        |s t service port protocol|

        s := TokenizedStream on:'/etc/services' asFilename readStream.
        s eolCommentCharacter:$#.
        s typeTable at:($- asciiValue) put:#letter.

        [s atEnd] whileFalse:[
            t := s next.
            t == #Identifier ifTrue:[
                service := s tokenName.
                t := s next.
                t == #Integer ifTrue:[
                    port := s tokenValue.
                    s next == $/ ifTrue:[
                        t := s next.
                        t == #Identifier ifTrue:[
                            protocol := s tokenName.
                            Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString).
                        ]
                    ]
                ]
            ].
            s skipToEol
        ]
"
! !

!TokenizedStream class methodsFor:'initialization'!

initialize
    |block|

    DefaultActions := Array new:256.
    DefaultTypes := Array new:256.

    "kludge: action is nextColonOrAssign, but type is special"
    2 to:255 do:[:code |
        DefaultTypes at:code put:(Character value:code).
    ].

    block := [:s :char | s nextInteger].
    ($0 asciiValue) to:($9 asciiValue) do:[:index |
        DefaultTypes at:index put:#digit.
        DefaultActions at:index put:block
    ].

    block := [:s :char | s nextIdentifier].
    ($a asciiValue) to:($z asciiValue) do:[:index |
        DefaultTypes at:index put:#letter.
        DefaultActions at:index put:block
    ].
    ($A asciiValue) to:($Z asciiValue) do:[:index |
        DefaultTypes at:index put:#letter.
        DefaultActions at:index put:block
    ].

    "
     TokenizedStream initialize
    "
! !

!TokenizedStream class methodsFor:'instance creation'!

on:aStream
    ^ self basicNew on:aStream
! !

!TokenizedStream methodsFor:'accessing'!

actionTable
    ^ actions

    "Created: 1.2.1996 / 17:42:00 / cg"
!

beginCommentCharacter:aCharacter
    beginCommentCharacter := aCharacter

    "Created: 1.2.1996 / 17:38:01 / cg"
!

endCommentCharacter:aCharacter
    endCommentCharacter := aCharacter

    "Created: 1.2.1996 / 17:38:06 / cg"
!

eolCommentCharacter:aCharacter
    eolCommentCharacter := aCharacter

    "Created: 1.2.1996 / 17:37:51 / cg"
!

tokenName
    ^ tokenName

    "Created: 1.2.1996 / 17:46:48 / cg"
!

tokenType
    ^ tokenType

    "Created: 1.2.1996 / 17:26:24 / cg"
!

tokenValue
    ^ tokenValue

    "Created: 1.2.1996 / 17:26:30 / cg"
!

typeTable
    ^ types

    "Created: 1.2.1996 / 17:41:54 / cg"
! !

!TokenizedStream methodsFor:'initialization'!

initialize
    tokenLineNr := 1.
    eolCommentCharacter := beginCommentCharacter := endCommentCharacter := nil.
    eolCharacter := Character cr.
    eolIsSignificant := false.

    actions := DefaultActions.
    types := DefaultTypes.

    "Modified: 1.2.1996 / 17:36:56 / cg"
! !

!TokenizedStream methodsFor:'private'!

on:aStringOrStream
    self initialize.

    aStringOrStream isStream ifFalse:[
        source := ReadStream on:aStringOrStream
    ] ifTrue:[
        source := aStringOrStream.
    ].

    "Created: 1.2.1996 / 16:18:34 / cg"
    "Modified: 1.2.1996 / 16:18:47 / cg"
! !

!TokenizedStream methodsFor:'reading'!

next
    ^ self nextToken

    "Created: 1.2.1996 / 17:21:47 / cg"
!

nextIdentifier
    |nextChar string oldString 
     index "{ Class: SmallInteger }"
     max   "{ Class: SmallInteger }" 
     t done|

    nextChar := source peek.
    string := String basicNew:20.
    index := 0.
    max := 10.

    done := false.
    [done] whileFalse:[
        nextChar isNil ifTrue:[
            done := true
        ] ifFalse:[
            t := types at:(nextChar asciiValue).
            done := (t ~~ #letter and:[t ~~ #digit]).
        ].
        done ifFalse:[
            (index == max) ifTrue:[
                oldString := string.
                string := String basicNew:(max * 2).
                string replaceFrom:1 to:max with:oldString.
                max := max * 2
            ].
            index := index + 1.
            string at:index put:nextChar.
            nextChar := source nextPeek
        ]
    ].
    tokenType := #Identifier.
    tokenName := string copyTo:index.
    ^ tokenType

    "Created: 1.2.1996 / 16:35:53 / cg"
    "Modified: 1.2.1996 / 17:51:59 / cg"
!

nextInteger
    tokenValue := Integer readFrom:source radix:10.
    tokenRadix := 10.
    tokenType := #Integer.
    ^ tokenType

    "Created: 1.2.1996 / 16:37:03 / cg"
    "Modified: 1.2.1996 / 16:37:28 / cg"
!

nextString:separator
    |nextChar string pos
     index "{ Class: SmallInteger }"
     len   "{ Class: SmallInteger }"
     inString|

    string := String basicNew:20.
    len := 20.
    index := 1.
    pos := source position.
    source next.
    nextChar := source next.
    inString := true.

    [inString] whileTrue:[
        nextChar isNil ifTrue:[
            self error:'unexpected end-of-input in String'.
            tokenType := #EOF.
            ^ tokenType
        ].
        (nextChar == Character cr) ifTrue:[
            tokenLineNr := tokenLineNr + 1
        ].
        (nextChar == separator) ifTrue:[
            (source peek == separator) ifTrue:[
                source next
            ] ifFalse:[
                inString := false
            ]
        ].
        inString ifTrue:[
            string at:index put:nextChar.
            (index == len) ifTrue:[
                string := string , (String new:len).
                len := len * 2
            ].
            index := index + 1.
            nextChar := source next
        ]
    ].
    tokenValue := string copyTo:(index - 1).
    tokenType := #String.
    ^ tokenType

    "Created: 1.2.1996 / 16:39:48 / cg"
!

nextToken
    "return the next token from the source-stream"

    |skipping actionBlock|

    peekChar notNil ifTrue:[
        hereChar := peekChar.
        peekChar := peekChar2.
        peekChar2 := nil
    ] ifFalse:[
        skipping := true.
        [skipping] whileTrue:[
            outStream notNil ifTrue:[
                [(hereChar := source peek) == Character space] whileTrue:[
                    source next.
                    outStream space. 
                    outCol := outCol + 1.
                ]
            ] ifFalse:[
                hereChar := source skipSeparatorsExceptCR.
            ].
            hereChar isNil ifTrue:[
                tokenType := #EOF.
                ^ tokenType
            ].
            hereChar == eolCharacter ifTrue:[
                tokenLineNr := tokenLineNr + 1.
                source next.
                outStream notNil ifTrue:[
                    outStream cr.
                    outCol := 1
                ].
                eolIsSignificant ifTrue:[
                    tokenType := #EOL.
                    ^ tokenType
                ]
            ] ifFalse:[
                hereChar == beginCommentCharacter ifTrue:[
                    "start of a comment"

                    self skipComment.
                    hereChar := source peek.
                ] ifFalse:[
                    hereChar == eolCommentCharacter ifTrue:[
                        "start of an eol comment"

                        self skipEolComment.
                        hereChar := source peek.
                    ] ifFalse:[
                        skipping := false
                    ]
                ]
            ]
        ].
        hereChar isNil ifTrue:[
            tokenType := #EOF.
            ^ tokenType
        ]
    ].
    tokenPosition := source position.

    actions notNil ifTrue:[
        actionBlock := actions at:(hereChar asciiValue).
        actionBlock notNil ifTrue:[
            ^ actionBlock value:self value:hereChar
        ]
    ].

    types notNil ifTrue:[
        source next.
        tokenType := types at:(hereChar asciiValue).
        tokenType notNil ifTrue:[
            ^ tokenType
        ]
    ].

    tokenType := #Error.
    ^ #Error

    "Modified: 1.2.1996 / 17:39:20 / cg"
!

skipComment
    source next.
    hereChar := source peek.

    [hereChar notNil and:[hereChar ~~ endCommentCharacter]] whileTrue:[
        hereChar == eolCharacter ifTrue:[
            tokenLineNr := tokenLineNr + 1.
        ].
        outStream notNil ifTrue:[
            outStream nextPut:hereChar.
            outCol := outCol + 1
        ].
        hereChar := source nextPeek
    ].

    "Created: 1.2.1996 / 17:35:24 / cg"
    "Modified: 1.2.1996 / 17:37:21 / cg"
!

skipEolComment
    source next.
    self skipToEol

    "Created: 1.2.1996 / 17:34:17 / cg"
    "Modified: 1.2.1996 / 18:06:33 / cg"
!

skipToEol
    hereChar := source peek.

    [hereChar notNil and:[hereChar ~~ eolCharacter]] whileTrue:[
        outStream notNil ifTrue:[
            outStream nextPut:hereChar.
            outCol := outCol + 1
        ].
        hereChar := source nextPeek.
    ].
    tokenLineNr := tokenLineNr + 1.

    "Created: 1.2.1996 / 18:06:09 / cg"
    "Modified: 1.2.1996 / 18:06:36 / cg"
! !

!TokenizedStream methodsFor:'testing'!

atEnd
   ^ source atEnd or:[tokenType == #Error or:[tokenType == #EOF]]

    "Created: 1.2.1996 / 17:21:28 / cg"
    "Modified: 1.2.1996 / 17:30:25 / cg"
! !

!TokenizedStream class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic2/TokenizedStream.st,v 1.1 1996-02-01 17:10:11 cg Exp $'
! !
TokenizedStream initialize!