Scanner.st
changeset 4438 bd363f7ca282
parent 4433 ad2645bee258
child 4441 a834368786b5
equal deleted inserted replaced
4437:5155327c7c91 4438:bd363f7ca282
       
     1 "{ Encoding: utf8 }"
       
     2 
     1 "
     3 "
     2  COPYRIGHT (c) 1989 by Claus Gittinger
     4  COPYRIGHT (c) 1989 by Claus Gittinger
     3 	      All Rights Reserved
     5 	      All Rights Reserved
     4 
     6 
     5  This software is furnished under a license and may be used
     7  This software is furnished under a license and may be used
   141 !
   143 !
   142 
   144 
   143 extendedBinarySelectorCharacters
   145 extendedBinarySelectorCharacters
   144     "return a collection of characters which are optionally allowed in binary selectors"
   146     "return a collection of characters which are optionally allowed in binary selectors"
   145 
   147 
   146     "/ ^ '±×·÷«»'.
   148     "/ ^ '±×·÷«»'.
   147     ^ String
   149     ^ String
   148 	with:(Character value:16rB1)  "/ plus-minus
   150 	with:(Character value:16rB1)  "/ plus-minus
   149 	with:(Character value:16rD7)  "/ times
   151 	with:(Character value:16rD7)  "/ times
   150 	with:(Character value:16rB7)  "/ centered dot
   152 	with:(Character value:16rB7)  "/ centered dot
   151 	with:(Character value:16rF7)  "/ divide
   153 	with:(Character value:16rF7)  "/ divide
  1854 
  1856 
  1855     "Modified: 23.5.1997 / 12:16:48 / cg"
  1857     "Modified: 23.5.1997 / 12:16:48 / cg"
  1856 !
  1858 !
  1857 
  1859 
  1858 warnParagraphAt:position
  1860 warnParagraphAt:position
  1859     "warn about §-character in an identifier"
  1861     "warn about §-character in an identifier"
  1860 
  1862 
  1861     ignoreWarnings ifFalse:[
  1863     ignoreWarnings ifFalse:[
  1862 	"/ didWarnAboutParagraphInIdentifier ifFalse:[
  1864 	"/ didWarnAboutParagraphInIdentifier ifFalse:[
  1863 	    parserFlags warnParagraphInIdentifier ifTrue:[
  1865 	    parserFlags warnParagraphInIdentifier ifTrue:[
  1864 		self
  1866 		self
  1865 		    warning:'§-characters in identifiers/symbols are nonportable'
  1867 		    warning:'§-characters in identifiers/symbols are nonportable'
  1866 		    doNotShowAgainAction:[ ParserFlags warnParagraphInIdentifier:false ]
  1868 		    doNotShowAgainAction:[ ParserFlags warnParagraphInIdentifier:false ]
  1867 		    position:position to:position.
  1869 		    position:position to:position.
  1868 		"
  1870 		"
  1869 		 only warn once (per method)
  1871 		 only warn once (per method)
  1870 		"
  1872 		"
  3004         ^ self xnextString:$' escapeStyle:string
  3006         ^ self xnextString:$' escapeStyle:string
  3005     ].    
  3007     ].    
  3006 
  3008 
  3007     (((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3009     (((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3008     or:[ (allowDollarInIdentifier and:[nextChar == $$ ])
  3010     or:[ (allowDollarInIdentifier and:[nextChar == $$ ])
  3009     or:[ (nextChar == $§ and:[ parserFlags allowParagraphInIdentifier])
  3011     or:[ (nextChar == $§ and:[ parserFlags allowParagraphInIdentifier])
  3010     or:[ allowNationalCharactersInIdentifier and:[ nextChar notNil and:[nextChar isNationalLetter]]]]]) ifTrue:[
  3012     or:[ allowNationalCharactersInIdentifier and:[ nextChar notNil and:[nextChar isNationalLetter]]]]]) ifTrue:[
  3011         pos := source position + 1.
  3013         pos := source position + 1.
  3012         nextChar == $_ ifTrue:[
  3014         nextChar == $_ ifTrue:[
  3013             self warnUnderscoreAt:pos.
  3015             self warnUnderscoreAt:pos.
  3014         ] ifFalse:[
  3016         ] ifFalse:[
  3015             nextChar == $$ ifTrue:[
  3017             nextChar == $$ ifTrue:[
  3016                 self warnDollarAt:pos.
  3018                 self warnDollarAt:pos.
  3017             ] ifFalse:[
  3019             ] ifFalse:[
  3018                 nextChar == $§ ifTrue:[
  3020                 nextChar == $§ ifTrue:[
  3019                     self warnParagraphAt:pos.
  3021                     self warnParagraphAt:pos.
  3020                 ] ifFalse:[
  3022                 ] ifFalse:[
  3021                     "/ self warnNationalCharacterAt:pos.
  3023                     "/ self warnNationalCharacterAt:pos.
  3022                 ]
  3024                 ]
  3023             ]
  3025             ]
  3033                     string := string , source nextAlphaNumericWord.
  3035                     string := string , source nextAlphaNumericWord.
  3034                     nextChar := source peekOrNil.
  3036                     nextChar := source peekOrNil.
  3035                 ].
  3037                 ].
  3036                 ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3038                 ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3037                       or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
  3039                       or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
  3038                       or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
  3040                       or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
  3039                       or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
  3041                       or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
  3040             ]
  3042             ]
  3041         ].
  3043         ].
  3042     ].
  3044     ].
  3043 
  3045 
  3598     "return the next token from the source-stream"
  3600     "return the next token from the source-stream"
  3599 
  3601 
  3600     |skipping actionBlock v ch tok|
  3602     |skipping actionBlock v ch tok|
  3601 
  3603 
  3602     source isPositionable ifTrue:[
  3604     source isPositionable ifTrue:[
  3603 	tokenLastEndPosition := source position+1.
  3605         tokenLastEndPosition := source position+1.
  3604     ].
  3606     ].
  3605 
  3607 
  3606     [
  3608     [
  3607 	peekChar notNil ifTrue:[
  3609         peekChar notNil ifTrue:[
  3608 	    "/ kludge - should be called peekSym.
  3610             "/ kludge - should be called peekSym.
  3609 	    "/ used when xlating Foo.Bar into Foo::Bar
  3611             "/ used when xlating Foo.Bar into Foo::Bar
  3610 	    peekChar isSymbol ifTrue:[
  3612             peekChar isSymbol ifTrue:[
  3611 		token := nil.
  3613                 token := nil.
  3612 		tokenType := peekChar.
  3614                 tokenType := peekChar.
  3613 		peekChar := nil.
  3615                 peekChar := nil.
  3614 		^ tokenType
  3616                 ^ tokenType
  3615 	    ].
  3617             ].
  3616 
  3618 
  3617 	    (peekChar isSeparator or:[ peekChar codePoint == 16rFEFF ]) ifTrue:[
  3619             (peekChar isSeparator or:[ peekChar codePoint == 16rFEFF ]) ifTrue:[
  3618 		peekChar == (Character cr) ifTrue:[
  3620                 peekChar == (Character cr) ifTrue:[
  3619 		    lineNr := lineNr + 1.
  3621                     lineNr := lineNr + 1.
  3620 		].
  3622                 ].
  3621 		hereChar := peekChar.
  3623                 hereChar := peekChar.
  3622 		peekChar := peekChar2.
  3624                 peekChar := peekChar2.
  3623 		peekChar2 := nil.
  3625                 peekChar2 := nil.
  3624 		(hereChar == Character cr) ifTrue:[
  3626                 (hereChar == Character cr) ifTrue:[
  3625 		    (self eolIsWhiteSpace) ifFalse:[
  3627                     (self eolIsWhiteSpace) ifFalse:[
  3626 			token := nil.
  3628                         token := nil.
  3627 			tokenType := #EOL.
  3629                         tokenType := #EOL.
  3628 			^ tokenType
  3630                         ^ tokenType
  3629 		    ].
  3631                     ].
  3630 		].
  3632                 ].
  3631 	    ].
  3633             ].
  3632 	].
  3634         ].
  3633 
  3635 
  3634 	peekChar notNil ifTrue:[
  3636         peekChar notNil ifTrue:[
  3635 	    ch := peekChar.
  3637             ch := peekChar.
  3636 	    peekChar := peekChar2.
  3638             peekChar := peekChar2.
  3637 	    peekChar2 := nil.
  3639             peekChar2 := nil.
  3638 	    hereChar := nil.
  3640             hereChar := nil.
  3639 	] ifFalse:[
  3641         ] ifFalse:[
  3640 	    skipping := true.
  3642             skipping := true.
  3641 	    [skipping] whileTrue:[
  3643             [skipping] whileTrue:[
  3642 
  3644 
  3643 		outStream notNil ifTrue:[
  3645                 outStream notNil ifTrue:[
  3644 		    [
  3646                     [
  3645 			hereChar := source peekOrNil.
  3647                         hereChar := source peekOrNil.
  3646 			(hereChar notNil
  3648                         (hereChar notNil
  3647 			    and:[(hereChar == Character space) or:[hereChar isSeparator]])
  3649                             and:[(hereChar == Character space) or:[hereChar isSeparator]])
  3648 		    ] whileTrue:[
  3650                     ] whileTrue:[
  3649 			source next.
  3651                         source next.
  3650 			outStream space.
  3652                         outStream space.
  3651 			outCol := outCol + 1.
  3653                         outCol := outCol + 1.
  3652 			hereChar == (Character cr) ifTrue:[
  3654                         hereChar == (Character cr) ifTrue:[
  3653 			    self eolIsWhiteSpace ifFalse:[
  3655                             self eolIsWhiteSpace ifFalse:[
  3654 				source isPositionable ifTrue:[
  3656                                 source isPositionable ifTrue:[
  3655 				    tokenPosition := source position.
  3657                                     tokenPosition := source position.
  3656 				].
  3658                                 ].
  3657 				token := nil.
  3659                                 token := nil.
  3658 				tokenType := #EOL.
  3660                                 tokenType := #EOL.
  3659 				^ tokenType
  3661                                 ^ tokenType
  3660 			    ].
  3662                             ].
  3661 			]
  3663                         ]
  3662 		    ]
  3664                     ]
  3663 		] ifFalse:[
  3665                 ] ifFalse:[
  3664 		    hereChar := source skipSeparatorsExceptCR.
  3666                     hereChar := source skipSeparatorsExceptCR.
  3665 		].
  3667                 ].
  3666 
  3668 
  3667 		hereChar isNil ifTrue:[
  3669                 hereChar isNil ifTrue:[
  3668 		    skipping := false
  3670                     skipping := false
  3669 		] ifFalse:[
  3671                 ] ifFalse:[
  3670 		    hereChar == (Character cr) ifTrue:[
  3672                     hereChar == (Character cr) ifTrue:[
  3671 			lineNr := lineNr + 1.
  3673                         lineNr := lineNr + 1.
  3672 			source next.
  3674                         source next.
  3673 			outStream notNil ifTrue:[
  3675                         outStream notNil ifTrue:[
  3674 			    outStream cr.
  3676                             outStream cr.
  3675 			    outCol := 1
  3677                             outCol := 1
  3676 			].
  3678                         ].
  3677 			self eolIsWhiteSpace ifFalse:[
  3679                         self eolIsWhiteSpace ifFalse:[
  3678 			    source isPositionable ifTrue:[
  3680                             source isPositionable ifTrue:[
  3679 				tokenPosition := source position.
  3681                                 tokenPosition := source position.
  3680 			    ].
  3682                             ].
  3681 			    token := nil.
  3683                             token := nil.
  3682 			    tokenType := #EOL.
  3684                             tokenType := #EOL.
  3683 			    ^ tokenType
  3685                             ^ tokenType
  3684 			].
  3686                         ].
  3685 		    ] ifFalse:[
  3687                     ] ifFalse:[
  3686 			hereChar == (Character return) ifTrue:[
  3688                         hereChar == (Character return) ifTrue:[
  3687 			    outStream notNil ifTrue:[
  3689                             outStream notNil ifTrue:[
  3688 				outStream nextPut:hereChar.
  3690                                 outStream nextPut:hereChar.
  3689 				outCol := 1
  3691                                 outCol := 1
  3690 			    ].
  3692                             ].
  3691 			    source next.
  3693                             source next.
  3692 			] ifFalse:[
  3694                         ] ifFalse:[
  3693 			    (self isCommentCharacter:hereChar) ifTrue:[
  3695                             (self isCommentCharacter:hereChar) ifTrue:[
  3694 				"start of a comment"
  3696                                 "start of a comment"
  3695 
  3697 
  3696 				self skipComment.
  3698                                 self skipComment.
  3697 				hereChar := source peekOrNil.
  3699                                 hereChar := source peekOrNil.
  3698 			    ] ifFalse:[
  3700                             ] ifFalse:[
  3699 				skipping := false
  3701                                 skipping := false
  3700 			    ]
  3702                             ]
  3701 			]
  3703                         ]
  3702 		    ]
  3704                     ]
  3703 		].
  3705                 ].
  3704 	    ].
  3706             ].
  3705 	    hereChar isNil ifTrue:[
  3707             hereChar isNil ifTrue:[
  3706 		source isPositionable ifTrue:[
  3708                 source isPositionable ifTrue:[
  3707 		    tokenPosition := source position + 1.
  3709                     tokenPosition := source position + 1.
  3708 		].
  3710                 ].
  3709 		token := nil.
  3711                 token := nil.
  3710 		tokenType := #EOF.
  3712                 tokenType := #EOF.
  3711 		^ tokenType
  3713                 ^ tokenType
  3712 	    ].
  3714             ].
  3713 	    ch := hereChar
  3715             ch := hereChar
  3714 	].
  3716         ].
  3715 	source isPositionable ifTrue:[
  3717         source isPositionable ifTrue:[
  3716 	    tokenPosition := source position + 1.
  3718             tokenPosition := source position + 1.
  3717 	].
  3719         ].
  3718 	tokenLineNr := lineNr.
  3720         tokenLineNr := lineNr.
  3719 
  3721 
  3720 	(v := ch codePoint) == 0 ifTrue:[
  3722         (v := ch codePoint) == 0 ifTrue:[
  3721 	    v := Character space codePoint
  3723             v := Character space codePoint
  3722 	].
  3724         ].
  3723 	v <= 16rFF ifTrue:[
  3725         v <= 16rFF ifTrue:[
  3724 	    actionBlock := actionArray at:v.
  3726             actionBlock := actionArray at:v.
  3725 	] ifFalse:[
  3727         ] ifFalse:[
  3726 	    actionBlock := unicodeActions at:v ifAbsent:nil
  3728             actionBlock := unicodeActions at:v ifAbsent:nil
  3727 	].
  3729         ].
  3728 	actionBlock notNil ifTrue:[
  3730         actionBlock notNil ifTrue:[
  3729 	    tok := actionBlock value:self value:ch.
  3731             tok := actionBlock value:self value:ch.
  3730 	    tok notNil ifTrue:[
  3732             tok notNil ifTrue:[
  3731 		^ tok
  3733                 ^ tok
  3732 	    ].
  3734             ].
  3733 	    "/ a nil token means: continue reading
  3735             "/ a nil token means: continue reading
  3734 	] ifFalse:[
  3736         ] ifFalse:[
  3735 	    (ch isNationalLetter and:[parserFlags allowNationalCharactersInIdentifier]) ifTrue:[
  3737             ((ch isNationalLetter and:[parserFlags allowNationalCharactersInIdentifier])
  3736 		tok := self nextIdentifier.
  3738               or:[ (ch == $§ and:[parserFlags allowParagraphInIdentifier]) 
  3737 		tok notNil ifTrue:[
  3739               or:[ (ch isGreekLetter and:[parserFlags allowGreekCharactersInIdentifier]) 
  3738 		    ^ tok
  3740             ]]) ifTrue:[
  3739 		].
  3741                 tok := self nextIdentifier.
  3740 		"/ a nil token means: continue reading
  3742                 tok notNil ifTrue:[
  3741 	    ] ifFalse:[
  3743                     ^ tok
  3742 		(ch == $§ and:[parserFlags allowParagraphInIdentifier]) ifTrue:[
  3744                 ].
  3743 		    tok := self nextIdentifier.
  3745                 "/ a nil token means: continue reading
  3744 		    tok notNil ifTrue:[
  3746             ] ifFalse:[
  3745 			^ tok
  3747                 ^ self invalidCharacter:ch.
  3746 		    ].
  3748             ].
  3747 		    "/ a nil token means: continue reading
  3749         ]
  3748 		] ifFalse:[
       
  3749 		    ^ self invalidCharacter:ch.
       
  3750 		].
       
  3751 	    ].
       
  3752 	]
       
  3753     ] loop.
  3750     ] loop.
  3754 
  3751 
  3755     "Modified: / 13-09-1995 / 12:56:14 / claus"
  3752     "Modified: / 13-09-1995 / 12:56:14 / claus"
  3756     "Modified: / 27-07-2011 / 15:36:53 / Jan Vrany <jan.vrany@fit.cvut.cz>"
  3753     "Modified: / 27-07-2011 / 15:36:53 / Jan Vrany <jan.vrany@fit.cvut.cz>"
  3757     "Modified: / 12-02-2017 / 11:27:59 / cg"
  3754     "Modified: / 12-02-2017 / 11:27:59 / cg"
       
  3755     "Modified: / 08-06-2019 / 14:59:14 / Claus Gittinger"
  3758 !
  3756 !
  3759 
  3757 
  3760 nextToken:aCharacter
  3758 nextToken:aCharacter
  3761     "return a character token"
  3759     "return a character token"
  3762 
  3760