Scanner.st
author claus
Sat, 11 Dec 1993 02:09:49 +0100
changeset 7 6c2bc76f0b8f
parent 4 f6fd83437415
child 10 73e97b6175c4
permissions -rw-r--r--
*** empty log message ***

"
 COPYRIGHT (c) 1989 by Claus Gittinger
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

Object subclass:#Scanner
       instanceVariableNames:'source 
                              token tokenType tokenPosition tokenValue
                              tokenName tokenLineNr
                              thisChar peekChar
                              requestor exitBlock
                              errorFlag ignoreErrors
                              saveComments currentComments'
          classVariableNames:'typeArray actionArray'
            poolDictionaries:''
                    category:'System-Compiler'
!

Scanner comment:'

COPYRIGHT (c) 1989 by Claus Gittinger
             All Rights Reserved

Scanner reads from a stream and returns individual smalltalk tokens
$Header: /cvs/stx/stx/libcomp/Scanner.st,v 1.4 1993-12-11 01:09:49 claus Exp $
'!

!Scanner class methodsFor:'instance creation'!

for:aStream
    "return a new scanner reading from aStream"

    ^ (super new) initializeFor:aStream
! !

!Scanner methodsFor:'private'!

initializeFor:aStream
    "initialize -
     if this is the first time, setup character- and action tables"

    |block|

    errorFlag := false.
    tokenLineNr := 1.
    source := aStream.
    currentComments := nil.
    saveComments := false.
    ignoreErrors := false.

    actionArray isNil ifTrue:[
        actionArray := Array new:256.
        typeArray := Array new:256.

        block := [:s :char | s nextNumber].
        ($0 asciiValue) to:($9 asciiValue) do:[:index |
            actionArray at:index put:block
        ].

        block := [:s :char | s nextIdentifier].
        ($a asciiValue) to:($z asciiValue) do:[:index |
            actionArray at:index put:block
        ].
        ($A asciiValue) to:($Z asciiValue) do:[:index |
            actionArray at:index put:block
        ].

        block := [:s :char | s nextSpecial].
        #( $& $- $+ $= $* $/ $\ $< $> $~ $@ $, $? ) do:[:binop |
            typeArray at:(binop asciiValue) put:#special.
            actionArray at:(binop asciiValue) put:block
        ].

        block := [:s :char | s nextToken:char].
        #( $; $. $( $) $[ $] $!! $^ $| $_ ) do:[:ch |
            actionArray at:(ch asciiValue) put:block
        ].

        "kludge: action is characterToken, but type is special"
        typeArray at:($| asciiValue) put:#special.

        actionArray at:($' asciiValue) put:[:s :char | s nextString].
        actionArray at:($$ asciiValue) put:[:s :char | s nextCharacter].
        actionArray at:($# asciiValue) put:[:s :char | s nextHash].
        actionArray at:($% asciiValue) put:[:s :char | s nextPrimitive].
        actionArray at:($: asciiValue) put:[:s :char | s nextColonOrAssign]
    ]
!

initialize
    "prepare a scan"

    errorFlag := false.
    tokenLineNr := 1.
    currentComments := nil.
    saveComments := false.
    ignoreErrors := false.
!

notifying:anObject
    "set the requestor to be notified"

    requestor := anObject
!

ignoreErrors
    "turn off notification of errors"

    ignoreErrors := true
!

backupPosition
    "if reading from a stream, at the end we might have read
     one token too many"

    (tokenType == #EOF) ifFalse:[
        source position:tokenPosition
    ]
! !

!Scanner methodsFor:'error handling'!

showErrorMessage:aMessage position:pos
    "show an errormessage on the Transcript"

    ignoreErrors ifFalse:[
        Transcript showCr:(pos printString , ' ' , aMessage)
    ]
!

notifyError:aMessage position:position to:endPos
    "notify requestor of an error - if there is no requestor
     put it on the transcript.
     Return the result passed back by the requestor."

    requestor isNil ifTrue:[
        self showErrorMessage:aMessage position:position.
        ^ false
    ].

    ^ requestor error:aMessage position:position to:endPos
!

notifyWarning:aMessage position:position to:endPos
    "notify requestor of an warning - if there is no requestor
     put it on the transcript.
     Return the result passed back by the requestor."

    requestor isNil ifTrue:[
        self showErrorMessage:aMessage position:position.
        ^ false
    ].
    ^ requestor warning:aMessage position:position to:endPos
!

syntaxError:aMessage position:position to:endPos
    "a syntax error happened"

    self notifyError:(' Error:' , aMessage) position:position to:endPos.
    exitBlock notNil ifTrue:[exitBlock value].
    ^ false
!

syntaxError:aMessage position:position
    "a syntax error happened - only start position is known"

    ^ self syntaxError:aMessage position:position to:nil
!

syntaxError:aMessage
    "a syntax error happened - position is not known"

    ^ self syntaxError:aMessage position:tokenPosition
!

warning:aMessage position:position to:endPos
    "a warning"

    ^ self notifyWarning:(' Warning:' , aMessage) position:position to:endPos
!

warning:aMessage position:position
    "a warning - only start position is known"

    ^ self warning:aMessage position:position to:nil
!

warning:aMessage
    "a warning - position is not known"

    ^ self warning:aMessage position:tokenPosition
! !

!Scanner methodsFor:'reading next token'!

skipComment
    |comment startPos|

    comment := ''.

    startPos := source position.
    source next.
    thisChar := source peek.
    [thisChar notNil and:[thisChar ~~ (Character doubleQuote)]] whileTrue:[
        thisChar == (Character cr) ifTrue:[
            tokenLineNr := tokenLineNr + 1.
        ].
        saveComments ifTrue:[
            comment := comment copyWith:thisChar
        ].
        source next.
        thisChar := source peek
    ].
    saveComments ifTrue:[
        currentComments isNil ifTrue:[
            currentComments := OrderedCollection with:comment
        ] ifFalse:[
            currentComments add:comment
        ]
    ].

    thisChar isNil ifTrue:[
        self warning:'unclosed comment' position:startPos to:(source position)
    ].
    "skip final dQuote"
    source next.
!

nextToken
    "return the next token from the source-stream"

    |skipping actionBlock comment|

    peekChar notNil ifTrue:[
        thisChar := peekChar.
        peekChar := nil
    ] ifFalse:[
        skipping := true.
        [skipping] whileTrue:[
            thisChar := source skipSeparatorsExceptCR.
            thisChar == (Character cr) ifTrue:[
                tokenLineNr := tokenLineNr + 1.
                source next
            ] ifFalse:[
                thisChar == (Character doubleQuote) ifTrue:[
                    "start of a comment"

                    self skipComment.
                    thisChar := source peek.
                ] ifFalse:[
                    skipping := false
                ]
            ]
        ].
        thisChar isNil ifTrue:[
            tokenType := #EOF.
            ^ tokenType
        ]
    ].
    tokenPosition := source position.

    actionBlock := actionArray at:(thisChar asciiValue).
    actionBlock notNil ifTrue:[
        ^ actionBlock value:self value:thisChar
    ].

    self syntaxError:('invalid character: ''' , thisChar asString , ''' ',
                      '(' , thisChar asciiValue printString , ')')
            position:tokenPosition to:tokenPosition.
    tokenType := #Error.
    ^ #Error
!

nextToken:aCharacter
    tokenType := aCharacter.
    source next.
    ^ tokenType
!

nextColonOrAssign
    "colon has been read - look for = to make it an assign"

    (source nextPeek == $=) ifTrue:[
        source next.
        tokenType := $_
    ] ifFalse:[
        tokenType := $:
    ].
    ^ tokenType
!
    
nextSpecial
    "a special character has been read, look for another one.
     also -number is handled here"

    |firstChar secondChar thirdChar string p|

    firstChar := source next.
    secondChar := source peek.
    (firstChar == $-) ifTrue:[
        secondChar isDigit ifTrue:[
            self nextNumber.
            tokenValue := tokenValue negated.
            ^ tokenType
        ]
    ].
    string := firstChar asString.
    ((typeArray at:(secondChar asciiValue)) == #special) ifTrue:[
        (secondChar == $-) ifTrue:[
            "special- look if minus belongs to number following"
            p := source position.
            source next.
            thirdChar := source peek.
            source position:p.
            thirdChar isDigit ifTrue:[
                tokenName := string.
                tokenType := #BinaryOperator.
                ^ tokenType
            ]
        ].
        source next.
        string := string copyWith:secondChar
    ].
    tokenName := string.
    tokenType := #BinaryOperator.
    ^ tokenType
!

nextCharacter
    "a $ has been read - return a character token"

    |nextChar|

    source next.
    nextChar := source next.
    nextChar notNil ifTrue:[
        tokenValue := nextChar.
        tokenType := #Character
    ] ifFalse:[
        tokenType := #EOF
    ].
    ^ tokenType
!

nextMantissa:radix
    |nextChar value factor|

    value := 0.
    factor := 1.0 / radix.
    nextChar := source peek.
    [(nextChar notNil and:[nextChar isDigitRadix:radix])] whileTrue:[
        value := value + (nextChar digitValue * factor).
        factor := factor / radix.
        nextChar := source nextPeek
    ].
    ^ value
!

nextNumber
    |nextChar value radix s|

    radix := 10.
    value := Integer readFrom:source radix:radix.
    nextChar := source peek.
    (nextChar == $r) ifTrue:[
        radix := value.
        source next.
        value := Integer readFrom:source radix:radix.
        nextChar := source peek
    ].
    (nextChar == $.) ifTrue:[
        nextChar := source nextPeek.
        (nextChar notNil and:[nextChar isDigitRadix:radix]) ifTrue:[
            value := value asFloat + (self nextMantissa:radix).
            nextChar := source peek
        ] ifFalse:[
            nextChar == (Character cr) ifTrue:[
                tokenLineNr := tokenLineNr + 1.
            ].
            peekChar := $.
        ]
    ].
    (nextChar == $e) ifTrue:[
        nextChar := source nextPeek.
        (nextChar notNil and:[(nextChar isDigitRadix:radix) or:['+-' includes:nextChar]]) ifTrue:[
            s := 1.
            (nextChar == $+) ifTrue:[
                nextChar := source nextPeek
            ] ifFalse:[
                (nextChar == $-) ifTrue:[
                    nextChar := source nextPeek.
                    s := s negated
                ]
            ].
            value := value asFloat
                     * (10.0 raisedToInteger:((Integer readFrom:source radix:radix) * s))
        ]
    ].
    tokenValue := value.
    (value isMemberOf:Float) ifTrue:[
        tokenType := #Float
    ] ifFalse:[
        tokenType := #Integer
    ].
    ^ tokenType
!

nextId
    |nextChar string oldString 
     index "{ Class: SmallInteger }"
     max   "{ Class: SmallInteger }" |

    nextChar := source peek.
    string := String new:10.
    index := 0.
    max := 10.
    [true] whileTrue:[
        (nextChar notNil and:[nextChar isAlphaNumeric]) ifFalse:[
            ^ string copyFrom:1 to:index
        ].
        (index == max) ifTrue:[
            oldString := string.
            string := String new:(max * 2).
            string replaceFrom:1 to:max with:oldString.
            max := max * 2
        ].
        index := index + 1.
        string at:index put:nextChar.
        nextChar := source nextPeek
    ]
!

nextIdentifier
    |nextChar string firstChar|

    string := source nextWord "self nextId".
    nextChar := source peek.
    (nextChar == $:) ifTrue:[
        source next.
        (source peek == $=) ifFalse:[
            tokenName := string copyWith:nextChar.
            tokenType := #Keyword.
            ^ self
        ].
        peekChar := $_
    ].
    firstChar := string at:1.
    (firstChar == $s) ifTrue:[
        (string = 'self') ifTrue:[tokenType := #Self. ^self].
        (string = 'super') ifTrue:[tokenType := #Super. ^self]
    ].
    (firstChar == $n) ifTrue:[
        (string = 'nil') ifTrue:[tokenType := #Nil. ^self]
    ].
    (firstChar == $t) ifTrue:[
        (string = 'true') ifTrue:[tokenType := #True. ^self].
        (string = 'thisContext') ifTrue:[tokenType := #ThisContext. ^self]
    ].
    (firstChar == $f) ifTrue:[
        (string = 'false') ifTrue:[tokenType := #False. ^self]
    ].
    tokenName := string.
    tokenType := #Identifier.
    ^ tokenType
!

nextPrimitive
    |nextChar inPrimitive string 
     index "{ Class: SmallInteger }"
     len   "{ Class: SmallInteger }" |

    nextChar := source nextPeek.
    string := String new:500.
    len := 500.
    index := 1.
    (nextChar == ${) ifTrue:[
        nextChar := source nextPeek.
        inPrimitive := true.
        [inPrimitive] whileTrue:[
            [nextChar == $%] whileFalse:[
                string at:index put:nextChar.
                (index == len) ifTrue:[
                    string := string , (String new:len).
                    len := len * 2
                ].
                index := index + 1.
                nextChar := source next
            ].
            (source peek == $}) ifTrue:[
                inPrimitive := false
            ] ifFalse:[
                string at:index put:nextChar.
                (index == len) ifTrue:[
                    string := string , (String new:len).
                    len := len * 2
                ].
                index := index + 1.
                nextChar := source next
            ]
        ].
        source next.
        tokenValue := string copyFrom:1 to:(index - 1).
        tokenType := #Primitive.
        tokenLineNr := tokenLineNr + (tokenValue occurrencesOf:(Character cr)).
        ^ tokenType
    ].

    "a % alone is a binary operator"
    tokenName := '%'.
    tokenType := #BinaryOperator.
    ^ tokenType.
"
    self syntaxError:('invalid character: ''' , nextChar asString , '''')
            position:tokenPosition to:(tokenPosition + 1).
    ^ #Error
"
!

nextHash
    |nextChar string|

    nextChar := source nextPeek.
    nextChar notNil ifTrue:[
        nextChar isAlphaNumeric ifTrue:[
            string := ''.
            [nextChar notNil and:[nextChar isAlphaNumeric]] whileTrue:[
                string := string , (source nextWord "self nextId").
                nextChar := source peek.
                (nextChar == $:) ifFalse:[
                    tokenValue := string asSymbol.
                    tokenType := #Symbol.
                    ^ tokenType
                ].
                string := string copyWith:nextChar.
                nextChar := source nextPeek
            ].
            tokenValue := string asSymbol.
            tokenType := #Symbol.
            ^ tokenType
        ].
        (nextChar == $( ) ifTrue:[
            source next.
            tokenType := #HashLeftParen.
            ^ tokenType
        ].
        (nextChar == $[ ) ifTrue:[
            "it seems that ST-80 supports Constant ByteArrays as #[...]
             (seen in a PD program)"
            source next.
            tokenType := #HashLeftBrack.
            ^ tokenType
        ].
        (nextChar == $' ) ifTrue:[
            "it seems that ST-80 supports arbitrary symbols as #'...'
             (seen in a PD program)"
            self nextString.
            tokenValue := tokenValue asSymbol.
            tokenType := #Symbol.
            ^ tokenType
        ].
        ((typeArray at:(nextChar asciiValue)) == #special) ifTrue:[
            string := source next asString.
            nextChar := source peek.
            nextChar notNil ifTrue:[
                ((typeArray at:(nextChar asciiValue)) == #special) ifTrue:[
                    source next.
                    string := string copyWith:nextChar
                ]
            ].
            tokenValue := string asSymbol.
            tokenType := #Symbol.
            ^ tokenType
        ]
    ].
    "this allows hash to be used as binop -
     I dont know, if this is correct ..."

    tokenName := '#'.
    tokenType := BinaryOperator.
    ^ tokenType
"
    self syntaxError:'unexpected end-of-input in Symbol'
            position:tokenPosition to:(tokenPosition + 1).
    ^ #Error
"
!

nextString
    |nextChar string pos
     index "{ Class: SmallInteger }"
     len   "{ Class: SmallInteger }"
     inString|

    string := String new:20.
    len := 20.
    index := 1.
    pos := source position.
    source next.
    nextChar := source next.
    inString := true.

    [inString] whileTrue:[
        nextChar isNil ifTrue:[
            self syntaxError:'unexpected end-of-input in String'
                    position:pos to:(source position - 1).
            tokenType := #EOF.
            ^ tokenType
        ].
        (nextChar == Character cr) ifTrue:[
            tokenLineNr := tokenLineNr + 1
        ].
        (nextChar == Character quote) ifTrue:[
            (source peek == Character quote) ifTrue:[
                source next
            ] ifFalse:[
                inString := false
            ]
        ].
        inString ifTrue:[
            string at:index put:nextChar.
            (index == len) ifTrue:[
                string := string , (String new:len).
                len := len * 2
            ].
            index := index + 1.
            nextChar := source next
        ]
    ].
    tokenValue := string copyFrom:1 to:(index - 1).
    tokenType := #String.
    ^ tokenType
! !