Scanner.st
author claus
Mon, 22 Aug 1994 15:47:03 +0200
changeset 38 7bd25d09a330
parent 33 8985ec2f9e82
child 41 62214c6ca833
permissions -rw-r--r--
*** empty log message ***

"
 COPYRIGHT (c) 1989 by Claus Gittinger
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

Object subclass:#Scanner
       instanceVariableNames:'source 
                              token tokenType tokenPosition tokenValue
                              tokenName tokenLineNr tokenRadix
                              thisChar peekChar
                              requestor exitBlock
                              errorFlag 
                              ignoreErrors ignoreWarnings
                              saveComments currentComments
                              warnSTXSpecialComment
                              outStream outCol'
          classVariableNames:'typeArray ActionArray'
            poolDictionaries:''
                    category:'System-Compiler'
!

Scanner comment:'
COPYRIGHT (c) 1989 by Claus Gittinger
             All Rights Reserved

$Header: /cvs/stx/stx/libcomp/Scanner.st,v 1.11 1994-08-22 13:47:03 claus Exp $
'!

!Scanner class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1989 by Claus Gittinger
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

version
"
$Header: /cvs/stx/stx/libcomp/Scanner.st,v 1.11 1994-08-22 13:47:03 claus Exp $
"
!

documentation
"
    Scanner reads from a stream and returns individual smalltalk tokens
"
! !

!Scanner class methodsFor:'instance creation'!

for:aStringOrStream
    "return a new scanner reading from aStringOrStream"

    ^ (super new) initializeFor:aStringOrStream
! !

!Scanner class methodsFor:'initialization'!

setupActions
    |block|

    ActionArray := Array new:256.
    TypeArray := Array new:256.

    block := [:s :char | s nextNumber].
    ($0 asciiValue) to:($9 asciiValue) do:[:index |
        ActionArray at:index put:block
    ].

    block := [:s :char | s nextIdentifier].
    ($a asciiValue) to:($z asciiValue) do:[:index |
        ActionArray at:index put:block
    ].
    ($A asciiValue) to:($Z asciiValue) do:[:index |
        ActionArray at:index put:block
    ].

    block := [:s :char | s nextSpecial].
    #( $& $- $+ $= $* $/ $\ $< $> $~ $@ $, $? ) do:[:binop |
        TypeArray at:(binop asciiValue) put:#special.
        ActionArray at:(binop asciiValue) put:block
    ].

    block := [:s :char | s nextToken:char].
    #( $; $. $( $) $[ $] $!! $^ $| $_ ) do:[:ch |
        ActionArray at:(ch asciiValue) put:block
    ].

    "kludge: action is characterToken, but type is special"
    TypeArray at:($| asciiValue) put:#special.

    ActionArray at:($' asciiValue) put:[:s :char | s nextString].
    ActionArray at:($$ asciiValue) put:[:s :char | s nextCharacter].
    ActionArray at:($# asciiValue) put:[:s :char | s nextHash].
    ActionArray at:($% asciiValue) put:[:s :char | s nextPrimitive].
    ActionArray at:($: asciiValue) put:[:s :char | s nextColonOrAssign]

    "
     Scanner setupActions
    "
! !

!Scanner methodsFor:'private'!

initializeFor:aStringOrStream
    "initialize -
     if this is the first time, setup character- and action tables"

    |block|

    errorFlag := false.
    tokenLineNr := 1.
    aStringOrStream isStream ifFalse:[
        source := ReadStream on:aStringOrStream
    ] ifTrue:[
        source := aStringOrStream.
    ].
    currentComments := nil.
    saveComments := false.
    ignoreErrors := false.
    ignoreWarnings := false.
    warnSTXSpecialComment := true.

    ActionArray isNil ifTrue:[
        self class setupActions
    ]
!

initialize
    "prepare a scan"

    errorFlag := false.
    tokenLineNr := 1.
    currentComments := nil.
    saveComments := false.
    ignoreErrors := false.
    ignoreWarnings := false.
    warnSTXSpecialComment := true
!

notifying:anObject
    "set the requestor to be notified"

    requestor := anObject
!

ignoreErrors
    "turn off notification of errors"

    ignoreErrors := true
!

ignoreWarnings
    "turn off notification of warnings"

    ignoreWarnings := true
!

backupPosition
    "if reading from a stream, at the end we might have read
     one token too many"

    (tokenType == #EOF) ifFalse:[
        source position:tokenPosition
    ]
!

beginComment
    ^ self
!

endComment:comment
    saveComments ifTrue:[
        currentComments isNil ifTrue:[
            currentComments := OrderedCollection with:comment
        ] ifFalse:[
            currentComments add:comment
        ]
    ].
! !

!Scanner methodsFor:'error handling'!

showErrorMessage:aMessage position:pos
    "show an errormessage on the Transcript"

    ignoreErrors ifFalse:[
        Smalltalk silentLoading == true ifFalse:[
            Transcript showCr:(pos printString , ' ' , aMessage)
        ]
    ]
!

notifyError:aMessage position:position to:endPos
    "notify requestor of an error - if there is no requestor
     put it on the transcript.
     Return the result passed back by the requestor."

    requestor isNil ifTrue:[
        self showErrorMessage:aMessage position:position.
        ^ false
    ].

    ^ requestor error:aMessage position:position to:endPos
!

notifyWarning:aMessage position:position to:endPos
    "notify requestor of an warning - if there is no requestor
     put it on the transcript.
     Return the result passed back by the requestor."

    requestor isNil ifTrue:[
        ignoreWarnings ifFalse:[
            self showErrorMessage:aMessage position:position.
        ].
        ^ false
    ].
    ^ requestor warning:aMessage position:position to:endPos
!

syntaxError:aMessage position:position to:endPos
    "a syntax error happened"

    self notifyError:(' Error:' , aMessage) position:position to:endPos.
    exitBlock notNil ifTrue:[exitBlock value].
    ^ false
!

syntaxError:aMessage position:position
    "a syntax error happened - only start position is known"

    ^ self syntaxError:aMessage position:position to:nil
!

syntaxError:aMessage
    "a syntax error happened - position is not known"

    ^ self syntaxError:aMessage position:tokenPosition
!

warning:aMessage position:position to:endPos
    "a warning"

    ^ self notifyWarning:('Warning: ' , aMessage) position:position to:endPos
!

warning:aMessage position:position
    "a warning - only start position is known"

    ^ self warning:aMessage position:position to:nil
!

warning:aMessage
    "a warning - position is not known"

    ^ self warning:aMessage position:tokenPosition
! !

!Scanner methodsFor:'general scanning'!

scanPositionsFor:aTokenString inString:aSourceString
    "scan aSourceString for occurrances of aTokenString.
     Return a collection of start positions.
     Added for VW compatibility (to support syntax-highlight)."

    |searchType searchName searchValue positions t|

    "
     first, look what kind of token we have to search for
    "
    self initializeFor:(ReadStream on:aTokenString).
    self nextToken.
    searchType := tokenType.
    searchName := tokenName.
    searchValue := tokenValue.

    "
     start the real work ...
    "
    self initializeFor:(ReadStream on:aSourceString).
    positions := OrderedCollection new.

    [(t := self nextToken) ~~ #EOF] whileTrue:[
        searchType == t ifTrue:[
            (searchName isNil or:[tokenName = searchName]) ifTrue:[
                (searchValue isNil or:[tokenValue = searchValue]) ifTrue:[
                    positions add:tokenPosition.
                ]
            ]
        ]
    ].

    ^ positions

    "
     Scanner new scanPositionsFor:'hello' inString:'foo bar hello baz hello' 
     Scanner new scanPositionsFor:'3.14' inString:'foo 3.145 bar hello 3.14 baz hello 3.14' 
     Scanner new scanPositionsFor:'16' inString:'foo 16 bar hello 16r10 baz hello 2r10000' 
    "
! !

!Scanner methodsFor:'reading next token'!

skipComment
    |comment startPos|

    comment := ''.

    self beginComment.
    outStream notNil ifTrue:[
        outStream nextPut:Character doubleQuote.
        outCol := outCol + 1
    ].

    startPos := source position.
    source next.
    thisChar := source peek.

    "special ST/X addition:
     a $/ right after the initial double quote makes it an up-to-end-of-line comment,
     which is very useful to comment out parts of filed-in source code.
     Since this is non-standard, use it in very rare cases only. 
     (maybe the upcoming ansi-standard adds something similar - in this case, I will
      change it without notice)"

    thisChar == $/ ifTrue:[
        [thisChar notNil and:[thisChar ~~ Character cr]] whileTrue:[
            saveComments ifTrue:[
                comment := comment copyWith:thisChar
            ].
            outStream notNil ifTrue:[
                outStream nextPut:thisChar.
                outCol := outCol + 1
            ].
            thisChar := source nextPeek.
        ].
        tokenLineNr := tokenLineNr + 1.
        ignoreWarnings ifFalse:[
            warnSTXSpecialComment ifTrue:[
                self warning:'end-of-line comments are a nonstandard feature of ST/X' 
                     position:startPos to:(source position).
                "
                 only warn once
                "
                warnSTXSpecialComment := false
            ]
        ].
        outStream notNil ifTrue:[
            outStream cr.
            outCol := 1
        ].
    ] ifFalse:[
        [thisChar notNil and:[thisChar ~~ (Character doubleQuote)]] whileTrue:[
            thisChar == (Character cr) ifTrue:[
                tokenLineNr := tokenLineNr + 1.
            ].
            saveComments ifTrue:[
                comment := comment copyWith:thisChar
            ].
            outStream notNil ifTrue:[
                outStream nextPut:thisChar.
                outCol := outCol + 1
            ].
            thisChar := source nextPeek
        ].
        thisChar isNil ifTrue:[
            self warning:'unclosed comment' position:startPos to:(source position)
        ] ifFalse:[
            outStream notNil ifTrue:[
                outStream nextPut:(Character doubleQuote).
                outCol := outCol + 1
            ].
        ]
    ].

    saveComments ifTrue:[
        currentComments isNil ifTrue:[
            currentComments := OrderedCollection with:comment
        ] ifFalse:[
            currentComments add:comment
        ]
    ].

    "skip final dQuote or cr"
    source next.

    self endComment:comment.
!

nextToken
    "return the next token from the source-stream"

    |skipping actionBlock comment|

    peekChar notNil ifTrue:[
        thisChar := peekChar.
        peekChar := nil
    ] ifFalse:[
        skipping := true.
        [skipping] whileTrue:[
            outStream notNil ifTrue:[
                [(thisChar := source peek) == Character space] whileTrue:[
                    source next.
                    outStream space. 
                    outCol := outCol + 1.
                ]
            ] ifFalse:[
                thisChar := source skipSeparatorsExceptCR.
            ].
            thisChar == (Character cr) ifTrue:[
                tokenLineNr := tokenLineNr + 1.
                source next.
                outStream notNil ifTrue:[
                    outStream cr.
                    outCol := 1
                ]
            ] ifFalse:[
                thisChar == (Character doubleQuote) ifTrue:[
                    "start of a comment"

                    self skipComment.
                    thisChar := source peek.
                ] ifFalse:[
                    skipping := false
                ]
            ]
        ].
        thisChar isNil ifTrue:[
            tokenType := #EOF.
            ^ tokenType
        ]
    ].
    tokenPosition := source position.

    actionBlock := ActionArray at:(thisChar asciiValue).
    actionBlock notNil ifTrue:[
        ^ actionBlock value:self value:thisChar
    ].

    self syntaxError:('invalid character: ''' , thisChar asString , ''' ',
                      '(' , thisChar asciiValue printString , ')')
            position:tokenPosition to:tokenPosition.
    tokenType := #Error.
    ^ #Error
!

nextToken:aCharacter
    tokenType := aCharacter.
    source next.
    ^ tokenType
!

nextColonOrAssign
    "colon has been read - look for = to make it an assign"

    (source nextPeek == $=) ifTrue:[
        source next.
        tokenType := $_
    ] ifFalse:[
        tokenType := $:
    ].
    ^ tokenType
!
    
nextSpecial
    "a special character has been read, look for another one.
     also -number is handled here"

    |firstChar secondChar thirdChar string p|

    firstChar := source next.
    secondChar := source peek.
    (firstChar == $-) ifTrue:[
        secondChar isDigit ifTrue:[
            self nextNumber.
            tokenValue := tokenValue negated.
            ^ tokenType
        ]
    ].
    string := firstChar asString.
    secondChar notNil ifTrue:[
        ((TypeArray at:(secondChar asciiValue)) == #special) ifTrue:[
            (secondChar == $-) ifTrue:[
                "special- look if minus belongs to number following"
                p := source position.
                source next.
                thirdChar := source peek.
                source position:p.
                thirdChar isDigit ifTrue:[
                    tokenName := string.
                    tokenType := #BinaryOperator.
                    ^ tokenType
                ]
            ].
            source next.
            string := string copyWith:secondChar
        ].
    ].
    tokenName := string.
    tokenType := #BinaryOperator.
    ^ tokenType
!

nextCharacter
    "a $ has been read - return a character token"

    |nextChar|

    source next.
    nextChar := source next.
    nextChar notNil ifTrue:[
        tokenValue := nextChar.
        tokenType := #Character
    ] ifFalse:[
        tokenType := #EOF
    ].
    ^ tokenType
!

nextMantissa:radix
    |nextChar value factor|

    value := 0.
    factor := 1.0 / radix.
    nextChar := source peek.
    [(nextChar notNil and:[nextChar isDigitRadix:radix])] whileTrue:[
        value := value + (nextChar digitValue * factor).
        factor := factor / radix.
        nextChar := source nextPeek
    ].
    ^ value
!

nextNumber
    |nextChar value s|

    tokenRadix := 10.
    value := Integer readFrom:source radix:tokenRadix.
    nextChar := source peek.
    (nextChar == $r) ifTrue:[
        tokenRadix := value.
        source next.
        s := 1.
        source peek == $- ifTrue:[
            source next.
            s := -1
        ].
        value := Integer readFrom:source radix:tokenRadix.
        value := value * s.
        nextChar := source peek
    ].
    (nextChar == $.) ifTrue:[
        nextChar := source nextPeek.
        (nextChar notNil and:[nextChar isDigitRadix:tokenRadix]) ifTrue:[
            value := value asFloat + (self nextMantissa:tokenRadix).
            nextChar := source peek
        ] ifFalse:[
            nextChar == (Character cr) ifTrue:[
                tokenLineNr := tokenLineNr + 1.
            ].
            peekChar := $.
        ]
    ].
    ((nextChar == $e) or:[nextChar == $E]) ifTrue:[
        nextChar := source nextPeek.
        (nextChar notNil and:[(nextChar isDigitRadix:tokenRadix) or:['+-' includes:nextChar]]) ifTrue:[
            s := 1.
            (nextChar == $+) ifTrue:[
                nextChar := source nextPeek
            ] ifFalse:[
                (nextChar == $-) ifTrue:[
                    nextChar := source nextPeek.
                    s := s negated
                ]
            ].
            value := value asFloat
                     * (10.0 raisedToInteger:((Integer readFrom:source radix:tokenRadix) * s))
        ]
    ].
    tokenValue := value.
    (value isMemberOf:Float) ifTrue:[
        tokenType := #Float
    ] ifFalse:[
        tokenType := #Integer
    ].
    ^ tokenType
!

nextId
    |nextChar string oldString 
     index "{ Class: SmallInteger }"
     max   "{ Class: SmallInteger }" |

    nextChar := source peek.
    string := String new:10.
    index := 0.
    max := 10.
    [true] whileTrue:[
        (nextChar notNil and:[nextChar isAlphaNumeric]) ifFalse:[
            ^ string copyTo:index
        ].
        (index == max) ifTrue:[
            oldString := string.
            string := String new:(max * 2).
            string replaceFrom:1 to:max with:oldString.
            max := max * 2
        ].
        index := index + 1.
        string at:index put:nextChar.
        nextChar := source nextPeek
    ]
!

nextIdentifier
    |nextChar string firstChar|

    string := source nextWord "self nextId".
    nextChar := source peek.
    (nextChar == $:) ifTrue:[
        source next.
        (source peek == $=) ifFalse:[
            tokenName := string copyWith:nextChar.
            tokenType := #Keyword.
            ^ self
        ].
        peekChar := $_
    ].
    tokenName := string.
    firstChar := string at:1.
    (firstChar == $s) ifTrue:[
        (string = 'self') ifTrue:[tokenType := #Self. ^self].
        (string = 'super') ifTrue:[tokenType := #Super. ^self]
    ].
    (firstChar == $n) ifTrue:[
        (string = 'nil') ifTrue:[tokenType := #Nil. ^self]
    ].
    (firstChar == $t) ifTrue:[
        (string = 'true') ifTrue:[tokenType := #True. ^self].
        (string = 'thisContext') ifTrue:[tokenType := #ThisContext. ^self]
    ].
    (firstChar == $f) ifTrue:[
        (string = 'false') ifTrue:[tokenType := #False. ^self]
    ].
    tokenType := #Identifier.
    ^ tokenType
!

nextPrimitive
    |nextChar inPrimitive string 
     index "{ Class: SmallInteger }"
     len   "{ Class: SmallInteger }" |

    nextChar := source nextPeek.
    string := String new:500.
    len := 500.
    index := 1.
    (nextChar == ${) ifTrue:[
        nextChar := source nextPeek.
        inPrimitive := true.
        [inPrimitive] whileTrue:[
            [nextChar == $%] whileFalse:[
                string at:index put:nextChar.
                (index == len) ifTrue:[
                    string := string , (String new:len).
                    len := len * 2
                ].
                index := index + 1.
                nextChar := source next
            ].
            (source peek == $}) ifTrue:[
                inPrimitive := false
            ] ifFalse:[
                string at:index put:nextChar.
                (index == len) ifTrue:[
                    string := string , (String new:len).
                    len := len * 2
                ].
                index := index + 1.
                nextChar := source next
            ]
        ].
        source next.
        tokenValue := string copyTo:(index - 1).
        tokenType := #Primitive.
        tokenLineNr := tokenLineNr + (tokenValue occurrencesOf:(Character cr)).
        ^ tokenType
    ].

    "a % alone is a binary operator"
    tokenName := '%'.
    tokenType := #BinaryOperator.
    ^ tokenType.
"
    self syntaxError:('invalid character: ''' , nextChar asString , '''')
            position:tokenPosition to:(tokenPosition + 1).
    ^ #Error
"
!

nextHash
    |nextChar string|

    nextChar := source nextPeek.
    nextChar notNil ifTrue:[
        nextChar isAlphaNumeric ifTrue:[
            string := ''.
            [nextChar notNil and:[nextChar isAlphaNumeric]] whileTrue:[
                string := string , (source nextWord "self nextId").
                nextChar := source peek.
                (nextChar == $:) ifFalse:[
                    tokenValue := string asSymbol.
                    tokenType := #Symbol.
                    ^ tokenType
                ].
                string := string copyWith:nextChar.
                nextChar := source nextPeek
            ].
            tokenValue := string asSymbol.
            tokenType := #Symbol.
            ^ tokenType
        ].
        (nextChar == $( ) ifTrue:[
            source next.
            tokenType := #HashLeftParen.
            ^ tokenType
        ].
        (nextChar == $[ ) ifTrue:[
            "it seems that ST-80 supports Constant ByteArrays as #[...]
             (seen in a PD program)"
            source next.
            tokenType := #HashLeftBrack.
            ^ tokenType
        ].
        (nextChar == $' ) ifTrue:[
            "it seems that ST-80 supports arbitrary symbols as #'...'
             (seen in a PD program)"
            self nextString.
            tokenValue := tokenValue asSymbol.
            tokenType := #Symbol.
            ^ tokenType
        ].
        ((TypeArray at:(nextChar asciiValue)) == #special) ifTrue:[
            string := source next asString.
            nextChar := source peek.
            nextChar notNil ifTrue:[
                ((TypeArray at:(nextChar asciiValue)) == #special) ifTrue:[
                    source next.
                    string := string copyWith:nextChar
                ]
            ].
            tokenValue := string asSymbol.
            tokenType := #Symbol.
            ^ tokenType
        ]
    ].
    "this allows hash to be used as binop -
     I dont know, if this is correct ..."

    tokenName := '#'.
    tokenType := BinaryOperator.
    ^ tokenType
"
    self syntaxError:'unexpected end-of-input in Symbol'
            position:tokenPosition to:(tokenPosition + 1).
    ^ #Error
"
!

nextString
    |nextChar string pos
     index "{ Class: SmallInteger }"
     len   "{ Class: SmallInteger }"
     inString|

    string := String new:20.
    len := 20.
    index := 1.
    pos := source position.
    source next.
    nextChar := source next.
    inString := true.

    [inString] whileTrue:[
        nextChar isNil ifTrue:[
            self syntaxError:'unexpected end-of-input in String'
                    position:pos to:(source position - 1).
            tokenType := #EOF.
            ^ tokenType
        ].
        (nextChar == Character cr) ifTrue:[
            tokenLineNr := tokenLineNr + 1
        ].
        (nextChar == Character quote) ifTrue:[
            (source peek == Character quote) ifTrue:[
                source next
            ] ifFalse:[
                inString := false
            ]
        ].
        inString ifTrue:[
            string at:index put:nextChar.
            (index == len) ifTrue:[
                string := string , (String new:len).
                len := len * 2
            ].
            index := index + 1.
            nextChar := source next
        ]
    ].
    tokenValue := string copyTo:(index - 1).
    tokenType := #String.
    ^ tokenType
! !