SmallSense__SmalltalkParser.st
author Jan Vrany <jan.vrany@fit.cvut.cz>
Wed, 18 Sep 2013 01:29:45 +0100
changeset 91 920e30d788dc
parent 85 d6a3fdbd87db
child 96 12fe1a59dfd1
permissions -rw-r--r--
Minor fixes in inferencer.

"{ Package: 'jv:smallsense' }"

"{ NameSpace: SmallSense }"

SyntaxHighlighter subclass:#SmalltalkParser
	instanceVariableNames:'error'
	classVariableNames:''
	poolDictionaries:''
	category:'SmallSense-Smalltalk'
!


!SmalltalkParser methodsFor:'error handling'!

parseError:message position:startPos to:endPos

    error := ParseErrorNode new 
                errorString: message;
                errorToken:  (token notNil ifTrue:[token asString] ifFalse:[nil]);
                startPosition: startPos endPosition: endPos.
    ^error

    "Created: / 27-11-2011 / 09:35:19 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    "Modified: / 15-08-2013 / 12:21:40 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

syntaxError:message position:startPos to:endPos

    ^self parseError:message position:startPos to:endPos

    "Created: / 27-11-2011 / 09:45:03 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!SmalltalkParser methodsFor:'parsing'!

blockStatementList
    "parse a blocks statementlist; return a node-tree, nil or #Error"

    |thisStatement prevStatement firstStatement eMsg blockStart lastErrorPosition |

    blockStart := tokenPosition.

    (tokenType == $] ) ifTrue:[^ nil].

    thisStatement := self statement.
    (thisStatement == #Error) ifTrue:[^ #Error].
    firstStatement := thisStatement.
    [tokenType == $] ] whileFalse:[
        (tokenType == $.) ifFalse:[
            (tokenType == #EOF) ifTrue:[
                | errnode |
                errnode := self syntaxError:'missing '']'' in block' position:blockStart to:(source position + 1).
                errnode children: (Array with: firstStatement).
                ^errnode
            ].

            (tokenType == $) ) ifTrue:[
                eMsg := 'missing '']'' or bad '')'' in block'
            ] ifFalse:[
                eMsg := 'missing ''.'' between statements (i.e. ''' , tokenType printString , '''-token unexpected)'
            ].

            lastErrorPosition == tokenPosition ifTrue:[
                "/ Failed to recover, still on the same token, give up.
                ^ error
            ].
            lastErrorPosition := tokenPosition.

            "/ Report error...
            self syntaxError:eMsg position:thisStatement startPosition to:tokenPosition.
            "/ ...an try to recover...

            " Situation 1: ================== 
                number isEven ifTrue:[
                    inst
                    number := number + 1.
                ]
            In this case, the `number` token is already consumed, leaving 
            UnaryNode with selector #number as an expression of last statement. 
            The current token is #:=. Try to recover by setting stream position
            just past of `inst` token"
            (token == #':=' 
                and:[thisStatement expression isMessage 
                    and:[thisStatement expression numArgs == 0
                        and:[thisStatement expression receiver endPosition notNil]]]) ifTrue:[
                            source position: thisStatement expression receiver endPosition.
                            thisStatement expression: thisStatement expression receiver.
                            self nextToken.
                        ]
        ] ifTrue:[
            self nextToken.
        ].

        prevStatement := thisStatement.

        tokenType == $] ifTrue:[
            "
            *** I had a warning here (since it was not defined
            *** in the blue-book; but PD-code contains a lot of
            *** code with periods at the end so that the warnings
            *** became annoying

            self warning:'period after last statement in block'.
            "
            self markBracketAt:tokenPosition.
            ^ self statementListRewriteHookFor:firstStatement
        ].
        thisStatement := self statement.
        (thisStatement == #Error) ifTrue:[^ #Error].
        prevStatement nextStatement:thisStatement
    ].
    self markBracketAt:tokenPosition.
    ^ self statementListRewriteHookFor:firstStatement

    "Created: / 15-08-2013 / 12:16:15 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    "Modified: / 26-08-2013 / 11:33:21 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

parseExpressionWithSelf:anObject notifying:someOne ignoreErrors:ignoreErrors ignoreWarnings:ignoreWarnings inNameSpace:aNameSpaceOrNil

    |tree token|

    aNameSpaceOrNil notNil ifTrue:[
        self currentNameSpace:aNameSpaceOrNil
    ].
    self setSelf:anObject.
    self notifying:someOne.
    self ignoreErrors:ignoreErrors.
    self ignoreWarnings:ignoreWarnings.
    token := self nextToken.
    (token == $^) ifTrue:[
        self nextToken.
    ].
    (token == #EOF) ifTrue:[
        ^ nil
    ].
    "/tree := self expression.
    tree := self statementList.    
    (self errorFlag or:[tree == #Error]) ifTrue:[^ #Error].
    ^ tree

    "Created: / 14-12-1999 / 15:11:37 / cg"
    "Created: / 09-07-2011 / 22:23:25 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!SmalltalkParser methodsFor:'parsing-expressions'!

keywordExpression
    "parse a keyword-expression; return a node-tree, nil or #Error.

     keywordExpression ::= binaryexpression
                           | { KEYWORD-PART binaryExpression }
    "

    |receiver expr|

    receiver := self binaryExpression.
    (receiver == #Error) ifTrue:[^ #Error].
    (tokenType == #EOF) ifTrue:[^ receiver].
    tokenType == $] ifTrue:[^ receiver].
    tokenType == $) ifTrue:[^ receiver].
    expr := self keywordExpressionFor:receiver.

    "/ expr could be an assignment as well, here
    (ignoreWarnings or:[ignoreErrors]) ifFalse:[
        "/ for a better error message, in case of a missing period in the previous message,
        "/    <expr> <missing period> foo := ...
        "/ would be parsed as unary message foo; detect this here, instead of high up in the calling hierarchy,
        "/ where it is difficult to provide a reasonable error message
        tokenType == #':=' ifTrue:[
            | positionOfPeriod exprLast |

            "/ Find the very last unary send node, Consider:
            "/    x := 2
            "/    y := false
            "/ 
            "/    x := 2 between: 0 and: 10
            "/    y := false  
            "/ 
            "/    x := 2 between: 0 and: self max
            "/    y := false  

            exprLast := expr.
            [ exprLast isMessage and: [ exprLast isUnaryMessage not ] ] whileTrue:[
                exprLast := exprLast args last.
            ].
            (exprLast isMessage and: [ exprLast isUnaryMessage ] ) ifTrue:[
                positionOfPeriod := exprLast receiver positionToInsertPeriodForStatementSeparation
            ].
            positionOfPeriod notNil ifTrue:[
                "/Try to recover
                expr := ParseErrorNode new
                        startPosition:expr startPosition endPosition: positionOfPeriod - 1;
                        errorString: ('":=" unexpected. Probably missing "." in previous expression.');
                        children: (Array with: expr);
                        yourself.        
                source position: positionOfPeriod.
                tokenType := $.
            ]
        ].
    ].

    ^ expr

    "Created: / 16-09-2013 / 17:23:09 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    "Modified: / 16-09-2013 / 23:18:56 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

primary
    | nodeOrError |

    nodeOrError := super primary.
    ^ (nodeOrError == #Error and:[error notNil]) ifTrue:[
        error
    ] ifFalse:[
        nodeOrError 
    ]

    "Created: / 19-08-2013 / 14:07:12 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

primary_expression
    "parse a parentized expression primary; return a node-tree, or raise an Error."

    |pos val eMsg|

    pos := tokenPosition.

    self nextToken.
    val := self expression.
    (val == #Error) ifTrue:[^ #Error].
    (tokenType == $) ) ifFalse:[
        | errnode |
        tokenType isCharacter ifTrue:[
            eMsg := 'missing '')'' (i.e. ''' , tokenType asString , ''' unexpected)'.
        ] ifFalse:[
            eMsg := 'missing '')'''.
        ].
        errnode := self syntaxError:eMsg withCRs position:pos to:tokenPosition.
        errnode children: (Array with: val).
        ^ errnode
    ].
    self markParenthesisAt:tokenPosition.
    parenthesisLevel := parenthesisLevel - 1.
    self nextToken.
    (self noAssignmentAllowed:'Invalid assignment to an expression' at:pos) ifFalse:[
        ^ #Error
    ].
    val parenthesized:true.
    ^ val

    "Created: / 15-08-2013 / 15:23:59 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!SmalltalkParser methodsFor:'parsing-tweaks'!

_blockStatementList
    "parse a blocks statementlist; return a node-tree, nil or #Error"
    
    |thisStatement prevStatement firstStatement eMsg blockStart|

    blockStart := tokenPosition.
    (tokenType == $]) ifTrue:[
        ^ nil
    ].          
    thisStatement := self statement.
    (thisStatement == #Error) ifTrue:[
        ^ #Error
    ].
    firstStatement := thisStatement.
    [
        tokenType == $] or:[ tokenType == #EOF ]
    ] whileFalse:[
        (tokenType == $.) ifFalse:[   
            (tokenType == #EOF) ifTrue:[
                "    self syntaxError:'missing '']'' in block' position:blockStart to:(source position1Based).
                    ^ #Error."
            ].
            (tokenType == $)) ifTrue:[
                eMsg := 'missing '']'' or bad '')'' in block'
            ] ifFalse:[
                eMsg := 'missing ''.'' between statements (i.e. ''' 
                            , tokenType printString , '''-token unexpected)'
            ].          
             "self syntaxError:eMsg position:blockStart to:tokenPosition.
             ^ #Error"
        ].

        prevStatement := thisStatement.    
        (eMsg isNil ) ifTrue:[              
            self nextToken.
        ].
        tokenType == $] ifTrue:[
            "
             *** I had a warning here (since it was not defined
             *** in the blue-book; but PD-code contains a lot of
             *** code with periods at the end so that the warnings
             *** became annoying

             self warning:'period after last statement in block'."
            self markBracketAt:tokenPosition.
            ^ firstStatement
        ].
        thisStatement := self statement.
        (thisStatement == #Error) ifTrue:[
            ^ #Error
        ].
        prevStatement nextStatement:thisStatement.
        (eMsg notNil) ifTrue:[
            self nextToken.
        ].
        eMsg := nil.
    ].
    self markBracketAt:tokenPosition.
    ^ self statementListRewriteHookFor:firstStatement

    "Created: / 05-08-2013 / 14:56:12 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

_primary
    "parse a primary-expression; return a node-tree, nil or #Error.
     This also cares for namespace-access-pathes."
    
    | node |
    node := super primary.
    "/If an error occured, return the error node"
    node == #Error ifTrue:[
        self assert: error notNil description: 'Parse error occured but no error node.'.
        node := error. error := nil.
    ].
    ^node

    "Created: / 05-08-2013 / 14:56:20 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

_statementList
    "parse a statementlist; return a node-tree, nil or #Error.
     Statements must be separated by periods.

     statementList ::= <statement>
                       | <statementList> . <statement>"
    
    |thisStatement prevStatement firstStatement periodPos prevExpr|

    thisStatement := self statement.
    (thisStatement == #Error) ifTrue:[
        self breakPoint: #jv.            
        ^ #Error
    ].
    firstStatement := thisStatement.
    [ tokenType == #EOF ] whileFalse:[
        prevExpr := thisStatement expression.
        (prevExpr notNil 
            and:[ prevExpr isMessage and:[ thisStatement isReturnNode not ] ]) 
                ifTrue:[
                    (#( #'=' #'==' ) includes:prevExpr selector) ifTrue:[
                        self 
                            warning:'useless computation - mistyped assignment (i.e. did you mean '':='') ?'
                            position:prevExpr selectorPosition
                    ].
                ].
        periodPos := tokenPosition.

        (tokenType == $. or:[ firstStatement = thisStatement and:[firstStatement expression isErrorNode] ]) ifTrue:[    
            self nextToken.
        ].
        tokenType == $. ifTrue:[
            self emptyStatement.
        ].
        (tokenType == $]) ifTrue:[
            currentBlock isNil ifTrue:[
                
            ] ifFalse:[
                ^ self statementListRewriteHookFor:firstStatement
            ].
        ].
        (tokenType == #EOF) ifTrue:[
            currentBlock notNil ifTrue:[
                "self parseError:''']'' expected (block nesting error)'."
            ] ifFalse:[
                ^ self statementListRewriteHookFor:firstStatement
            ].
        ].
        prevStatement := thisStatement.
        prevStatement isReturnNode ifTrue:[
            self warning:'statements after return' position:tokenPosition
        ].
        thisStatement := self statement.
        (thisStatement == #Error) ifTrue:[
            self breakPoint: #jv.           
            ^ #Error
        ].
        (thisStatement expression isNil or:[thisStatement expression isErrorNode]) ifTrue:[
            self nextToken.
        ].

        prevStatement nextStatement:thisStatement
    ].
    ^ self statementListRewriteHookFor:firstStatement

    "Created: / 05-08-2013 / 14:56:05 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!SmalltalkParser methodsFor:'private'!

findNameSpaceWith:varName
    | ns |

    "The super #findNameSpaceWith: checks whether the the gloval named 'varName' exists,
     if not, returns the current namespace which is then prepended to 'varName'.

     Here we have to deal with uncomplete global names, so trick the caller by returning
     nil if the partially typed global name starts with current namespace prefix."

    classToCompileFor notNil ifTrue:[
        ns := classToCompileFor topNameSpace.
        (varName = ns name) ifTrue:[
            ^ nil
        ]
    ].

    ^ super findNameSpaceWith:varName

    "Created: / 28-07-2013 / 13:49:09 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!SmalltalkParser methodsFor:'syntax coloring'!

markBracketAt:pos

    sourceText isNil ifTrue:[^self].
    ^super markBracketAt:pos

    "Created: / 03-04-2011 / 22:39:56 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

markFrom:pos1 to:pos2 withEmphasis:fontEmp color:clrIn

    sourceText isNil ifTrue:[^self].
    super markFrom:pos1 to:pos2 withEmphasis:fontEmp color:clrIn

    "Created: / 03-04-2011 / 22:24:36 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

markFrom:pos1 to:pos2 withEmphasis:fontEmp color:fgClr1 ifNil:fgClr2 backgroundColor:bgClr

    sourceText isNil ifTrue:[^self].
    super markFrom:pos1 to:pos2 withEmphasis:fontEmp color:fgClr1 ifNil:fgClr2 backgroundColor:bgClr

    "Created: / 14-02-2012 / 11:08:24 / Jan Vrany <jan.vrany@fit.cvut.cz>"
! !

!SmalltalkParser class methodsFor:'documentation'!

version_HG

    ^ '$Changeset: <not expanded> $'
!

version_SVN
    ^ '$Id: SmallSenseParser.st 7922 2012-03-09 07:57:34Z vranyj1 $'
! !