Scanner.st
changeset 4426 49123df91ffa
parent 4422 a06688034985
child 4432 fef0c840421e
--- a/Scanner.st	Thu May 30 12:03:06 2019 +0200
+++ b/Scanner.st	Thu May 30 19:07:34 2019 +0200
@@ -1,5 +1,3 @@
-"{ Encoding: utf8 }"
-
 "
  COPYRIGHT (c) 1989 by Claus Gittinger
 	      All Rights Reserved
@@ -145,7 +143,7 @@
 extendedBinarySelectorCharacters
     "return a collection of characters which are optionally allowed in binary selectors"
 
-    "/ ^ '±×·÷«»'.
+    "/ ^ '±×·÷«»'.
     ^ String
 	with:(Character value:16rB1)  "/ plus-minus
 	with:(Character value:16rD7)  "/ times
@@ -1526,12 +1524,12 @@
 
     v := ch codePoint.
     ch isPrintable ifTrue:[
-	errMsg := 'Invalid character: ''' , ch asString , ''' ', '(' , (v radixPrintStringRadix:16) , ').'.
+        errMsg := 'Invalid character: ''' , ch asString , ''' ', '(' , (v radixPrintStringRadix:16) , ').'.
     ] ifFalse:[
-	errMsg := 'Invalid character: ' , (v radixPrintStringRadix:16) , '.'.
+        errMsg := 'Invalid character: ' , (v radixPrintStringRadix:16) , '.'.
     ].
     v > 16r7F ifTrue:[
-	errMsg := errMsg , '
+        errMsg := errMsg , '
 
 Notice:
   Only 7-bit ascii allowed (for compatibility with other Smalltalk dialects).
@@ -1858,13 +1856,13 @@
 !
 
 warnParagraphAt:position
-    "warn about §-character in an identifier"
+    "warn about §-character in an identifier"
 
     ignoreWarnings ifFalse:[
 	"/ didWarnAboutParagraphInIdentifier ifFalse:[
 	    parserFlags warnParagraphInIdentifier ifTrue:[
 		self
-		    warning:'§-characters in identifiers/symbols are nonportable'
+		    warning:'§-characters in identifiers/symbols are nonportable'
 		    doNotShowAgainAction:[ ParserFlags warnParagraphInIdentifier:false ]
 		    position:position to:position.
 		"
@@ -2741,18 +2739,19 @@
 
 nextHash
     "a # has been read - return either
-	a symbol,
-	HashLeftParen     (for '#('),
-	HashLeftBrack     (for '#['),
-	HashLeftBrace     (for '#{'  and AllowQualifiedNames)
-	HashHashLeftParen (for '##(' and AllowDolphinExtensions)
-	HashHashLeftBrack (for '##[' )
-	HashHash          (for '##' )
+        a symbol,
+        HashLeftParen     (for '#('),
+        HashLeftBrack     (for '#['),
+        HashLeftBrace     (for '#{'  and AllowQualifiedNames)
+        HashHashLeftParen (for '##(' and AllowDolphinExtensions)
+        HashHashLeftBrack (for '##[' )
+        HashHash          (for '##' )
 
      extended syntax (scheme-style literal arrays):
-	HashTypedArrayParen   (for '#u8(', '#s8(' , '#u16(' ...)
-	type in tokenValue: u1, u8, u16, u32, u64, s8, s16, s32, s64,
-			    f16, f32, f64, f, d, b, B
+     (requires ParserFlags allowSTXExtendedArrayLiterals:true)
+        HashTypedArrayParen   (for '#u8(', '#s8(' , '#u16(' ...)
+        type in tokenValue: u1, u8, u16, u32, u64, s8, s16, s32, s64,
+                            f16, f32, f64, f, d, b, B
     "
 
     |nextChar string allowUnderscoreInIdentifier|
@@ -2761,135 +2760,149 @@
 
     nextChar := source nextPeek.
     nextChar notNil ifTrue:[
-	(nextChar == $( ) ifTrue:[
-	    source next.
-	    token := '#('.
-	    tokenType := #HashLeftParen.
-	    ^ tokenType
-	].
-
-	(nextChar == $[ ) ifTrue:[
-	    "ST-80 & ST/X support Constant ByteArrays as #[...]"
-	    source next.
-	    token := '#['.
-	    tokenType := #HashLeftBrack.
-	    ^ tokenType
-	].
-
-	(nextChar == ${ ) ifTrue:[
-	    " #{ ... } is one of:
-		#{ Foo.Bar.Baz }            VW3 and later qualified name
-		#{ xx-xx-xx-xx-...-xx }     StAgents UUID
-		#{ URL }                    url object qualifier
-	    "
-	    source next.
-	    token := '#{'.
-	    tokenType := #HashLeftBrace.
-	    ^ tokenType
-	].
-
-	(nextChar == $' ) ifTrue:[
-	    "ST-80 and ST/X support arbitrary symbols as #'...'"
-	    self nextString:nextChar.
-	    self markSymbolFrom:tokenPosition to:(source position).
-	    tokenType == #EOF ifFalse:[
-		tokenValue isWideString ifTrue:[
-		    self syntaxError:'symbols which require 2-byte characters are not (yet) allowed'
-			    position:tokenPosition to:(source position).
-		].
-		tokenValue := token := tokenValue asSymbol.
-		tokenType := #Symbol.
-	    ].
-	    ^ tokenType
-	].
-
-	(nextChar == $#) ifTrue:[
-	    nextChar := source nextPeek.
-	    nextChar == $( ifTrue:[
-		parserFlags allowDolphinExtensions == true ifTrue:[
-		    "dolphin does computed literals as ##( ... )"
-		    source next.
-		    token := '##('.
-		    tokenType := #HashHashLeftParen.
-		    ^ tokenType
-		].
-	    ].
-
-	    nextChar == $[ ifTrue:[
-		source next.
-		token := '##['.
-		tokenType := #HashHashLeftBrack.
-		^ tokenType
-	    ].
-
-	    parserFlags allowVisualAgeESSymbolLiterals == true ifTrue:[
-		(self nextSymbolAfterHash) notNil ifTrue:[
-		    tokenType := #ESSymbol.
-		    ^ #ESSymbol
-		].
-		(nextChar == $') ifTrue:[
-		    source next.
-		    self nextString:nextChar.
-		    tokenType := #ESSymbol.
-		    ^ #ESSymbol
-		].
-	    ].
-
-	    token := '##'.
-	    tokenType := #HashHash.
-	    ^ tokenType
-	].
-
-	parserFlags allowSTXExtendedArrayLiterals ifTrue:[
-	    "/ scheme-style typed literal array extension
-	    ('usfdbB' includes:nextChar) ifTrue:[
-		|prefix|
-
-		prefix := String with:nextChar.
-		nextChar := source nextPeek.
-		[nextChar notNil and:[nextChar isDigit]] whileTrue:[
-		    prefix := prefix copyWith:nextChar.
-		    nextChar := source nextPeek.
-		].
-		nextChar == $( ifTrue:[
-		    source next.
-		    (
-			#( 'f' 'd' 'b' 'B'
-			   'u1' 'u8' 'u16' 'u32' 'u64'
-			   's8' 's16' 's32' 's64'
-			   'f16' 'f32' 'f64'
-			) includes:prefix
-		    ) ifTrue:[
-			tokenType := #HashTypedArrayParen.
-			tokenValue := prefix asSymbol.
-			^ tokenType
-		    ].
-		    self parseError:'unsupported literal array type: ',prefix.
-		    tokenType := #HashLeftParen.
-		    ^ #HashLeftParen
-		].
-		^ self nextSymbolAfterHash:prefix.
-	    ].
-	].
-
-	(self nextSymbolAfterHash) notNil ifTrue:[
-	    ^ #Symbol
-	].
-
-	(self isSpecialOrExtendedSpecialCharacter:nextChar) ifTrue:[
-	    string := source next asString.
-	    nextChar := source peek.
-	    nextChar notNil ifTrue:[
-		(self isSpecialOrExtendedSpecialCharacter:nextChar) ifTrue:[
-		    source next.
-		    string := string copyWith:nextChar
-		]
-	    ].
-	    self markSymbolFrom:tokenPosition to:(source position).
-	    tokenValue := token := string asSymbol.
-	    tokenType := #Symbol.
-	    ^ tokenType
-	]
+        (nextChar == $( ) ifTrue:[
+            source next.
+            token := '#('.
+            tokenType := #HashLeftParen.
+            ^ tokenType
+        ].
+
+        (nextChar == $[ ) ifTrue:[
+            "ST-80 & ST/X support Constant ByteArrays as #[...]
+             now all Smalltalk dialects do."
+            source next.
+            token := '#['.
+            tokenType := #HashLeftBrack.
+            ^ tokenType
+        ].
+
+        (nextChar == ${ ) ifTrue:[
+            " #{ ... } is one of:
+                #{ Foo.Bar.Baz }            VW3 and later qualified name
+                #{ xx-xx-xx-xx-...-xx }     StAgents UUID
+                #{ URL }                    url object qualifier
+                #{ key: value ... }         inline literal object
+            "
+            source next.
+            token := '#{'.
+            tokenType := #HashLeftBrace.
+            ^ tokenType
+        ].
+
+        (nextChar == $' ) ifTrue:[
+            "ST-80 and ST/X support arbitrary symbols as #'...'
+             now all dialects do"
+            self nextString:nextChar.
+            self markSymbolFrom:tokenPosition to:(source position).
+            tokenType == #EOF ifFalse:[
+                tokenValue isWideString ifTrue:[
+                    self syntaxError:'symbols which require 2-byte characters are not (yet) allowed'
+                            position:tokenPosition to:(source position).
+                ].
+                tokenValue := token := tokenValue asSymbol.
+                tokenType := #Symbol.
+            ].
+            ^ tokenType
+        ].
+
+        (nextChar == $#) ifTrue:[
+            nextChar := source nextPeek.
+            nextChar == $( ifTrue:[
+                parserFlags allowDolphinExtensions == true ifTrue:[
+                    "dolphin does computed literals as ##( expression )"
+                    source next.
+                    token := '##('.
+                    tokenType := #HashHashLeftParen.
+                    ^ tokenType
+                ].
+            ].
+
+            nextChar == $[ ifTrue:[
+                source next.
+                token := '##['.
+                tokenType := #HashHashLeftBrack.
+                ^ tokenType
+            ].
+
+            parserFlags allowVisualAgeESSymbolLiterals == true ifTrue:[
+                "V'age has special ESsymbols as ##name or ##'name'"
+                (self nextSymbolAfterHash) notNil ifTrue:[
+                    tokenType := #ESSymbol.
+                    ^ #ESSymbol
+                ].
+                (nextChar == $') ifTrue:[
+                    source next.
+                    self nextString:nextChar.
+                    tokenType := #ESSymbol.
+                    ^ #ESSymbol
+                ].
+            ].
+
+            token := '##'.
+            tokenType := #HashHash.
+            ^ tokenType
+        ].
+
+        "/ scheme-style typed literal arrays:
+        "/    #uXX( ... )  XX = { 1, 8, 16, 32, 64 } - bit, uint8, uint16, uint32 or uint64 array
+        "/    #iXX( ... )  XX = { 8, 16, 32, 64 }    - int8, int16, int32 or int64 array
+        "/    #fXX( ... )  XX = { 16, 32, 64 }       - IEEE half, single or double array
+        "/    #f( ... ) - IEEE single float array 
+        "/    #d( ... ) - IEEE double array  
+        "/    #b( ... ) - bit array
+        "/    #B( ... ) - boolean array  
+        ('usfdbB' includes:nextChar) ifTrue:[
+            |prefix|
+
+            "/ collec tuntil we know what we get...
+            prefix := String with:nextChar.
+            nextChar := source nextPeek.
+            [nextChar notNil and:[nextChar isDigit]] whileTrue:[
+                prefix := prefix copyWith:nextChar.
+                nextChar := source nextPeek.
+            ].
+            nextChar == $( ifTrue:[
+                parserFlags allowSTXExtendedArrayLiterals ifFalse:[
+                    self parseError:c'Non-Standard ST/X extension used: #XXX( .. ) unboxed array literal.\nPlease enable "allowSTXExtendedArrayLiterals" in the ParserFlags\n\nNotice: this is currently not supported by stc' 
+                         position:tokenPosition to:source position
+                ].    
+                source next.
+                (
+                    #( 'f' 'd' 'b' 'B'
+                       'u1' 'u8' 'u16' 'u32' 'u64'
+                       's8' 's16' 's32' 's64'
+                       'f16' 'f32' 'f64'
+                    ) includes:prefix
+                ) ifTrue:[
+                    tokenType := #HashTypedArrayParen.
+                    tokenValue := prefix asSymbol.
+                    ^ tokenType
+                ].
+                self parseError:'unsupported literal array type: ',prefix.
+                tokenType := #HashLeftParen.
+                ^ #HashLeftParen
+            ].
+            ^ self nextSymbolAfterHash:prefix.
+        ].
+
+        (self nextSymbolAfterHash) notNil ifTrue:[
+            ^ #Symbol
+        ].
+
+        (self isSpecialOrExtendedSpecialCharacter:nextChar) ifTrue:[
+            string := source next asString.
+            nextChar := source peek.
+            nextChar notNil ifTrue:[
+                (self isSpecialOrExtendedSpecialCharacter:nextChar) ifTrue:[
+                    source next.
+                    string := string copyWith:nextChar
+                ]
+            ].
+            self markSymbolFrom:tokenPosition to:(source position).
+            tokenValue := token := string asSymbol.
+            tokenType := #Symbol.
+            ^ tokenType
+        ]
     ].
 
     "this allows hash to be used as binop -
@@ -2900,6 +2913,7 @@
 
     "Modified: / 01-08-2006 / 14:57:19 / cg"
     "Modified (format): / 30-09-2011 / 12:23:04 / cg"
+    "Modified: / 30-05-2019 / 19:06:36 / Claus Gittinger"
 !
 
 nextId
@@ -2991,7 +3005,7 @@
 
     (((nextChar == $_) and:[allowUnderscoreInIdentifier])
     or:[ (allowDollarInIdentifier and:[nextChar == $$ ])
-    or:[ (nextChar == $§ and:[ parserFlags allowParagraphInIdentifier])
+    or:[ (nextChar == $§ and:[ parserFlags allowParagraphInIdentifier])
     or:[ allowNationalCharactersInIdentifier and:[ nextChar notNil and:[nextChar isNationalLetter]]]]]) ifTrue:[
         pos := source position + 1.
         nextChar == $_ ifTrue:[
@@ -3000,7 +3014,7 @@
             nextChar == $$ ifTrue:[
                 self warnDollarAt:pos.
             ] ifFalse:[
-                nextChar == $§ ifTrue:[
+                nextChar == $§ ifTrue:[
                     self warnParagraphAt:pos.
                 ] ifFalse:[
                     "/ self warnNationalCharacterAt:pos.
@@ -3020,7 +3034,7 @@
                 ].
                 ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
                       or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
-                      or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
+                      or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
                       or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
             ]
         ].
@@ -3715,7 +3729,7 @@
 		].
 		"/ a nil token means: continue reading
 	    ] ifFalse:[
-		(ch == $§ and:[parserFlags allowParagraphInIdentifier]) ifTrue:[
+		(ch == $§ and:[parserFlags allowParagraphInIdentifier]) ifTrue:[
 		    tok := self nextIdentifier.
 		    tok notNil ifTrue:[
 			^ tok