Scanner.st
changeset 4426 49123df91ffa
parent 4422 a06688034985
child 4432 fef0c840421e
equal deleted inserted replaced
4425:87133d993838 4426:49123df91ffa
     1 "{ Encoding: utf8 }"
       
     2 
       
     3 "
     1 "
     4  COPYRIGHT (c) 1989 by Claus Gittinger
     2  COPYRIGHT (c) 1989 by Claus Gittinger
     5 	      All Rights Reserved
     3 	      All Rights Reserved
     6 
     4 
     7  This software is furnished under a license and may be used
     5  This software is furnished under a license and may be used
   143 !
   141 !
   144 
   142 
   145 extendedBinarySelectorCharacters
   143 extendedBinarySelectorCharacters
   146     "return a collection of characters which are optionally allowed in binary selectors"
   144     "return a collection of characters which are optionally allowed in binary selectors"
   147 
   145 
   148     "/ ^ '±×·÷«»'.
   146     "/ ^ '±×·÷«»'.
   149     ^ String
   147     ^ String
   150 	with:(Character value:16rB1)  "/ plus-minus
   148 	with:(Character value:16rB1)  "/ plus-minus
   151 	with:(Character value:16rD7)  "/ times
   149 	with:(Character value:16rD7)  "/ times
   152 	with:(Character value:16rB7)  "/ centered dot
   150 	with:(Character value:16rB7)  "/ centered dot
   153 	with:(Character value:16rF7)  "/ divide
   151 	with:(Character value:16rF7)  "/ divide
  1524 invalidCharacter:ch
  1522 invalidCharacter:ch
  1525     |errMsg v|
  1523     |errMsg v|
  1526 
  1524 
  1527     v := ch codePoint.
  1525     v := ch codePoint.
  1528     ch isPrintable ifTrue:[
  1526     ch isPrintable ifTrue:[
  1529 	errMsg := 'Invalid character: ''' , ch asString , ''' ', '(' , (v radixPrintStringRadix:16) , ').'.
  1527         errMsg := 'Invalid character: ''' , ch asString , ''' ', '(' , (v radixPrintStringRadix:16) , ').'.
  1530     ] ifFalse:[
  1528     ] ifFalse:[
  1531 	errMsg := 'Invalid character: ' , (v radixPrintStringRadix:16) , '.'.
  1529         errMsg := 'Invalid character: ' , (v radixPrintStringRadix:16) , '.'.
  1532     ].
  1530     ].
  1533     v > 16r7F ifTrue:[
  1531     v > 16r7F ifTrue:[
  1534 	errMsg := errMsg , '
  1532         errMsg := errMsg , '
  1535 
  1533 
  1536 Notice:
  1534 Notice:
  1537   Only 7-bit ascii allowed (for compatibility with other Smalltalk dialects).
  1535   Only 7-bit ascii allowed (for compatibility with other Smalltalk dialects).
  1538   You can enable some of the special characters via the compiler-settings dialog.'.
  1536   You can enable some of the special characters via the compiler-settings dialog.'.
  1539     ].
  1537     ].
  1856 
  1854 
  1857     "Modified: 23.5.1997 / 12:16:48 / cg"
  1855     "Modified: 23.5.1997 / 12:16:48 / cg"
  1858 !
  1856 !
  1859 
  1857 
  1860 warnParagraphAt:position
  1858 warnParagraphAt:position
  1861     "warn about §-character in an identifier"
  1859     "warn about §-character in an identifier"
  1862 
  1860 
  1863     ignoreWarnings ifFalse:[
  1861     ignoreWarnings ifFalse:[
  1864 	"/ didWarnAboutParagraphInIdentifier ifFalse:[
  1862 	"/ didWarnAboutParagraphInIdentifier ifFalse:[
  1865 	    parserFlags warnParagraphInIdentifier ifTrue:[
  1863 	    parserFlags warnParagraphInIdentifier ifTrue:[
  1866 		self
  1864 		self
  1867 		    warning:'§-characters in identifiers/symbols are nonportable'
  1865 		    warning:'§-characters in identifiers/symbols are nonportable'
  1868 		    doNotShowAgainAction:[ ParserFlags warnParagraphInIdentifier:false ]
  1866 		    doNotShowAgainAction:[ ParserFlags warnParagraphInIdentifier:false ]
  1869 		    position:position to:position.
  1867 		    position:position to:position.
  1870 		"
  1868 		"
  1871 		 only warn once (per method)
  1869 		 only warn once (per method)
  1872 		"
  1870 		"
  2739     ^ self invalidCharacter:source peek.
  2737     ^ self invalidCharacter:source peek.
  2740 !
  2738 !
  2741 
  2739 
  2742 nextHash
  2740 nextHash
  2743     "a # has been read - return either
  2741     "a # has been read - return either
  2744 	a symbol,
  2742         a symbol,
  2745 	HashLeftParen     (for '#('),
  2743         HashLeftParen     (for '#('),
  2746 	HashLeftBrack     (for '#['),
  2744         HashLeftBrack     (for '#['),
  2747 	HashLeftBrace     (for '#{'  and AllowQualifiedNames)
  2745         HashLeftBrace     (for '#{'  and AllowQualifiedNames)
  2748 	HashHashLeftParen (for '##(' and AllowDolphinExtensions)
  2746         HashHashLeftParen (for '##(' and AllowDolphinExtensions)
  2749 	HashHashLeftBrack (for '##[' )
  2747         HashHashLeftBrack (for '##[' )
  2750 	HashHash          (for '##' )
  2748         HashHash          (for '##' )
  2751 
  2749 
  2752      extended syntax (scheme-style literal arrays):
  2750      extended syntax (scheme-style literal arrays):
  2753 	HashTypedArrayParen   (for '#u8(', '#s8(' , '#u16(' ...)
  2751      (requires ParserFlags allowSTXExtendedArrayLiterals:true)
  2754 	type in tokenValue: u1, u8, u16, u32, u64, s8, s16, s32, s64,
  2752         HashTypedArrayParen   (for '#u8(', '#s8(' , '#u16(' ...)
  2755 			    f16, f32, f64, f, d, b, B
  2753         type in tokenValue: u1, u8, u16, u32, u64, s8, s16, s32, s64,
       
  2754                             f16, f32, f64, f, d, b, B
  2756     "
  2755     "
  2757 
  2756 
  2758     |nextChar string allowUnderscoreInIdentifier|
  2757     |nextChar string allowUnderscoreInIdentifier|
  2759 
  2758 
  2760     allowUnderscoreInIdentifier := parserFlags allowUnderscoreInIdentifier.
  2759     allowUnderscoreInIdentifier := parserFlags allowUnderscoreInIdentifier.
  2761 
  2760 
  2762     nextChar := source nextPeek.
  2761     nextChar := source nextPeek.
  2763     nextChar notNil ifTrue:[
  2762     nextChar notNil ifTrue:[
  2764 	(nextChar == $( ) ifTrue:[
  2763         (nextChar == $( ) ifTrue:[
  2765 	    source next.
  2764             source next.
  2766 	    token := '#('.
  2765             token := '#('.
  2767 	    tokenType := #HashLeftParen.
  2766             tokenType := #HashLeftParen.
  2768 	    ^ tokenType
  2767             ^ tokenType
  2769 	].
  2768         ].
  2770 
  2769 
  2771 	(nextChar == $[ ) ifTrue:[
  2770         (nextChar == $[ ) ifTrue:[
  2772 	    "ST-80 & ST/X support Constant ByteArrays as #[...]"
  2771             "ST-80 & ST/X support Constant ByteArrays as #[...]
  2773 	    source next.
  2772              now all Smalltalk dialects do."
  2774 	    token := '#['.
  2773             source next.
  2775 	    tokenType := #HashLeftBrack.
  2774             token := '#['.
  2776 	    ^ tokenType
  2775             tokenType := #HashLeftBrack.
  2777 	].
  2776             ^ tokenType
  2778 
  2777         ].
  2779 	(nextChar == ${ ) ifTrue:[
  2778 
  2780 	    " #{ ... } is one of:
  2779         (nextChar == ${ ) ifTrue:[
  2781 		#{ Foo.Bar.Baz }            VW3 and later qualified name
  2780             " #{ ... } is one of:
  2782 		#{ xx-xx-xx-xx-...-xx }     StAgents UUID
  2781                 #{ Foo.Bar.Baz }            VW3 and later qualified name
  2783 		#{ URL }                    url object qualifier
  2782                 #{ xx-xx-xx-xx-...-xx }     StAgents UUID
  2784 	    "
  2783                 #{ URL }                    url object qualifier
  2785 	    source next.
  2784                 #{ key: value ... }         inline literal object
  2786 	    token := '#{'.
  2785             "
  2787 	    tokenType := #HashLeftBrace.
  2786             source next.
  2788 	    ^ tokenType
  2787             token := '#{'.
  2789 	].
  2788             tokenType := #HashLeftBrace.
  2790 
  2789             ^ tokenType
  2791 	(nextChar == $' ) ifTrue:[
  2790         ].
  2792 	    "ST-80 and ST/X support arbitrary symbols as #'...'"
  2791 
  2793 	    self nextString:nextChar.
  2792         (nextChar == $' ) ifTrue:[
  2794 	    self markSymbolFrom:tokenPosition to:(source position).
  2793             "ST-80 and ST/X support arbitrary symbols as #'...'
  2795 	    tokenType == #EOF ifFalse:[
  2794              now all dialects do"
  2796 		tokenValue isWideString ifTrue:[
  2795             self nextString:nextChar.
  2797 		    self syntaxError:'symbols which require 2-byte characters are not (yet) allowed'
  2796             self markSymbolFrom:tokenPosition to:(source position).
  2798 			    position:tokenPosition to:(source position).
  2797             tokenType == #EOF ifFalse:[
  2799 		].
  2798                 tokenValue isWideString ifTrue:[
  2800 		tokenValue := token := tokenValue asSymbol.
  2799                     self syntaxError:'symbols which require 2-byte characters are not (yet) allowed'
  2801 		tokenType := #Symbol.
  2800                             position:tokenPosition to:(source position).
  2802 	    ].
  2801                 ].
  2803 	    ^ tokenType
  2802                 tokenValue := token := tokenValue asSymbol.
  2804 	].
  2803                 tokenType := #Symbol.
  2805 
  2804             ].
  2806 	(nextChar == $#) ifTrue:[
  2805             ^ tokenType
  2807 	    nextChar := source nextPeek.
  2806         ].
  2808 	    nextChar == $( ifTrue:[
  2807 
  2809 		parserFlags allowDolphinExtensions == true ifTrue:[
  2808         (nextChar == $#) ifTrue:[
  2810 		    "dolphin does computed literals as ##( ... )"
  2809             nextChar := source nextPeek.
  2811 		    source next.
  2810             nextChar == $( ifTrue:[
  2812 		    token := '##('.
  2811                 parserFlags allowDolphinExtensions == true ifTrue:[
  2813 		    tokenType := #HashHashLeftParen.
  2812                     "dolphin does computed literals as ##( expression )"
  2814 		    ^ tokenType
  2813                     source next.
  2815 		].
  2814                     token := '##('.
  2816 	    ].
  2815                     tokenType := #HashHashLeftParen.
  2817 
  2816                     ^ tokenType
  2818 	    nextChar == $[ ifTrue:[
  2817                 ].
  2819 		source next.
  2818             ].
  2820 		token := '##['.
  2819 
  2821 		tokenType := #HashHashLeftBrack.
  2820             nextChar == $[ ifTrue:[
  2822 		^ tokenType
  2821                 source next.
  2823 	    ].
  2822                 token := '##['.
  2824 
  2823                 tokenType := #HashHashLeftBrack.
  2825 	    parserFlags allowVisualAgeESSymbolLiterals == true ifTrue:[
  2824                 ^ tokenType
  2826 		(self nextSymbolAfterHash) notNil ifTrue:[
  2825             ].
  2827 		    tokenType := #ESSymbol.
  2826 
  2828 		    ^ #ESSymbol
  2827             parserFlags allowVisualAgeESSymbolLiterals == true ifTrue:[
  2829 		].
  2828                 "V'age has special ESsymbols as ##name or ##'name'"
  2830 		(nextChar == $') ifTrue:[
  2829                 (self nextSymbolAfterHash) notNil ifTrue:[
  2831 		    source next.
  2830                     tokenType := #ESSymbol.
  2832 		    self nextString:nextChar.
  2831                     ^ #ESSymbol
  2833 		    tokenType := #ESSymbol.
  2832                 ].
  2834 		    ^ #ESSymbol
  2833                 (nextChar == $') ifTrue:[
  2835 		].
  2834                     source next.
  2836 	    ].
  2835                     self nextString:nextChar.
  2837 
  2836                     tokenType := #ESSymbol.
  2838 	    token := '##'.
  2837                     ^ #ESSymbol
  2839 	    tokenType := #HashHash.
  2838                 ].
  2840 	    ^ tokenType
  2839             ].
  2841 	].
  2840 
  2842 
  2841             token := '##'.
  2843 	parserFlags allowSTXExtendedArrayLiterals ifTrue:[
  2842             tokenType := #HashHash.
  2844 	    "/ scheme-style typed literal array extension
  2843             ^ tokenType
  2845 	    ('usfdbB' includes:nextChar) ifTrue:[
  2844         ].
  2846 		|prefix|
  2845 
  2847 
  2846         "/ scheme-style typed literal arrays:
  2848 		prefix := String with:nextChar.
  2847         "/    #uXX( ... )  XX = { 1, 8, 16, 32, 64 } - bit, uint8, uint16, uint32 or uint64 array
  2849 		nextChar := source nextPeek.
  2848         "/    #iXX( ... )  XX = { 8, 16, 32, 64 }    - int8, int16, int32 or int64 array
  2850 		[nextChar notNil and:[nextChar isDigit]] whileTrue:[
  2849         "/    #fXX( ... )  XX = { 16, 32, 64 }       - IEEE half, single or double array
  2851 		    prefix := prefix copyWith:nextChar.
  2850         "/    #f( ... ) - IEEE single float array 
  2852 		    nextChar := source nextPeek.
  2851         "/    #d( ... ) - IEEE double array  
  2853 		].
  2852         "/    #b( ... ) - bit array
  2854 		nextChar == $( ifTrue:[
  2853         "/    #B( ... ) - boolean array  
  2855 		    source next.
  2854         ('usfdbB' includes:nextChar) ifTrue:[
  2856 		    (
  2855             |prefix|
  2857 			#( 'f' 'd' 'b' 'B'
  2856 
  2858 			   'u1' 'u8' 'u16' 'u32' 'u64'
  2857             "/ collec tuntil we know what we get...
  2859 			   's8' 's16' 's32' 's64'
  2858             prefix := String with:nextChar.
  2860 			   'f16' 'f32' 'f64'
  2859             nextChar := source nextPeek.
  2861 			) includes:prefix
  2860             [nextChar notNil and:[nextChar isDigit]] whileTrue:[
  2862 		    ) ifTrue:[
  2861                 prefix := prefix copyWith:nextChar.
  2863 			tokenType := #HashTypedArrayParen.
  2862                 nextChar := source nextPeek.
  2864 			tokenValue := prefix asSymbol.
  2863             ].
  2865 			^ tokenType
  2864             nextChar == $( ifTrue:[
  2866 		    ].
  2865                 parserFlags allowSTXExtendedArrayLiterals ifFalse:[
  2867 		    self parseError:'unsupported literal array type: ',prefix.
  2866                     self parseError:c'Non-Standard ST/X extension used: #XXX( .. ) unboxed array literal.\nPlease enable "allowSTXExtendedArrayLiterals" in the ParserFlags\n\nNotice: this is currently not supported by stc' 
  2868 		    tokenType := #HashLeftParen.
  2867                          position:tokenPosition to:source position
  2869 		    ^ #HashLeftParen
  2868                 ].    
  2870 		].
  2869                 source next.
  2871 		^ self nextSymbolAfterHash:prefix.
  2870                 (
  2872 	    ].
  2871                     #( 'f' 'd' 'b' 'B'
  2873 	].
  2872                        'u1' 'u8' 'u16' 'u32' 'u64'
  2874 
  2873                        's8' 's16' 's32' 's64'
  2875 	(self nextSymbolAfterHash) notNil ifTrue:[
  2874                        'f16' 'f32' 'f64'
  2876 	    ^ #Symbol
  2875                     ) includes:prefix
  2877 	].
  2876                 ) ifTrue:[
  2878 
  2877                     tokenType := #HashTypedArrayParen.
  2879 	(self isSpecialOrExtendedSpecialCharacter:nextChar) ifTrue:[
  2878                     tokenValue := prefix asSymbol.
  2880 	    string := source next asString.
  2879                     ^ tokenType
  2881 	    nextChar := source peek.
  2880                 ].
  2882 	    nextChar notNil ifTrue:[
  2881                 self parseError:'unsupported literal array type: ',prefix.
  2883 		(self isSpecialOrExtendedSpecialCharacter:nextChar) ifTrue:[
  2882                 tokenType := #HashLeftParen.
  2884 		    source next.
  2883                 ^ #HashLeftParen
  2885 		    string := string copyWith:nextChar
  2884             ].
  2886 		]
  2885             ^ self nextSymbolAfterHash:prefix.
  2887 	    ].
  2886         ].
  2888 	    self markSymbolFrom:tokenPosition to:(source position).
  2887 
  2889 	    tokenValue := token := string asSymbol.
  2888         (self nextSymbolAfterHash) notNil ifTrue:[
  2890 	    tokenType := #Symbol.
  2889             ^ #Symbol
  2891 	    ^ tokenType
  2890         ].
  2892 	]
  2891 
       
  2892         (self isSpecialOrExtendedSpecialCharacter:nextChar) ifTrue:[
       
  2893             string := source next asString.
       
  2894             nextChar := source peek.
       
  2895             nextChar notNil ifTrue:[
       
  2896                 (self isSpecialOrExtendedSpecialCharacter:nextChar) ifTrue:[
       
  2897                     source next.
       
  2898                     string := string copyWith:nextChar
       
  2899                 ]
       
  2900             ].
       
  2901             self markSymbolFrom:tokenPosition to:(source position).
       
  2902             tokenValue := token := string asSymbol.
       
  2903             tokenType := #Symbol.
       
  2904             ^ tokenType
       
  2905         ]
  2893     ].
  2906     ].
  2894 
  2907 
  2895     "this allows hash to be used as binop -
  2908     "this allows hash to be used as binop -
  2896      I don't know, if this is correct ..."
  2909      I don't know, if this is correct ..."
  2897     tokenName := token := '#'.
  2910     tokenName := token := '#'.
  2898     tokenType := #BinaryOperator.
  2911     tokenType := #BinaryOperator.
  2899     ^ tokenType
  2912     ^ tokenType
  2900 
  2913 
  2901     "Modified: / 01-08-2006 / 14:57:19 / cg"
  2914     "Modified: / 01-08-2006 / 14:57:19 / cg"
  2902     "Modified (format): / 30-09-2011 / 12:23:04 / cg"
  2915     "Modified (format): / 30-09-2011 / 12:23:04 / cg"
       
  2916     "Modified: / 30-05-2019 / 19:06:36 / Claus Gittinger"
  2903 !
  2917 !
  2904 
  2918 
  2905 nextId
  2919 nextId
  2906     "no longer used here - remains for backwardCompatibility for
  2920     "no longer used here - remains for backwardCompatibility for
  2907      subclass users ... (sigh)"
  2921      subclass users ... (sigh)"
  2989         ^ self xnextString:$' escapeStyle:string
  3003         ^ self xnextString:$' escapeStyle:string
  2990     ].    
  3004     ].    
  2991 
  3005 
  2992     (((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3006     (((nextChar == $_) and:[allowUnderscoreInIdentifier])
  2993     or:[ (allowDollarInIdentifier and:[nextChar == $$ ])
  3007     or:[ (allowDollarInIdentifier and:[nextChar == $$ ])
  2994     or:[ (nextChar == $§ and:[ parserFlags allowParagraphInIdentifier])
  3008     or:[ (nextChar == $§ and:[ parserFlags allowParagraphInIdentifier])
  2995     or:[ allowNationalCharactersInIdentifier and:[ nextChar notNil and:[nextChar isNationalLetter]]]]]) ifTrue:[
  3009     or:[ allowNationalCharactersInIdentifier and:[ nextChar notNil and:[nextChar isNationalLetter]]]]]) ifTrue:[
  2996         pos := source position + 1.
  3010         pos := source position + 1.
  2997         nextChar == $_ ifTrue:[
  3011         nextChar == $_ ifTrue:[
  2998             self warnUnderscoreAt:pos.
  3012             self warnUnderscoreAt:pos.
  2999         ] ifFalse:[
  3013         ] ifFalse:[
  3000             nextChar == $$ ifTrue:[
  3014             nextChar == $$ ifTrue:[
  3001                 self warnDollarAt:pos.
  3015                 self warnDollarAt:pos.
  3002             ] ifFalse:[
  3016             ] ifFalse:[
  3003                 nextChar == $§ ifTrue:[
  3017                 nextChar == $§ ifTrue:[
  3004                     self warnParagraphAt:pos.
  3018                     self warnParagraphAt:pos.
  3005                 ] ifFalse:[
  3019                 ] ifFalse:[
  3006                     "/ self warnNationalCharacterAt:pos.
  3020                     "/ self warnNationalCharacterAt:pos.
  3007                 ]
  3021                 ]
  3008             ]
  3022             ]
  3018                     string := string , source nextAlphaNumericWord.
  3032                     string := string , source nextAlphaNumericWord.
  3019                     nextChar := source peekOrNil.
  3033                     nextChar := source peekOrNil.
  3020                 ].
  3034                 ].
  3021                 ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3035                 ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3022                       or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
  3036                       or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
  3023                       or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
  3037                       or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
  3024                       or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
  3038                       or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
  3025             ]
  3039             ]
  3026         ].
  3040         ].
  3027     ].
  3041     ].
  3028 
  3042 
  3713 		tok notNil ifTrue:[
  3727 		tok notNil ifTrue:[
  3714 		    ^ tok
  3728 		    ^ tok
  3715 		].
  3729 		].
  3716 		"/ a nil token means: continue reading
  3730 		"/ a nil token means: continue reading
  3717 	    ] ifFalse:[
  3731 	    ] ifFalse:[
  3718 		(ch == $§ and:[parserFlags allowParagraphInIdentifier]) ifTrue:[
  3732 		(ch == $§ and:[parserFlags allowParagraphInIdentifier]) ifTrue:[
  3719 		    tok := self nextIdentifier.
  3733 		    tok := self nextIdentifier.
  3720 		    tok notNil ifTrue:[
  3734 		    tok notNil ifTrue:[
  3721 			^ tok
  3735 			^ tok
  3722 		    ].
  3736 		    ].
  3723 		    "/ a nil token means: continue reading
  3737 		    "/ a nil token means: continue reading