#FEATURE by cg
authorClaus Gittinger <cg@exept.de>
Fri, 08 Feb 2019 21:59:45 +0100
changeset 4334 96211fc814c7
parent 4333 06e11aae91bd
child 4335 d58aa3c7751c
#FEATURE by cg class: Scanner c-strings c'...' where ... is escaped by C-like rules added: #characterNamed: #nextString:escapeStyle: comment/format in: #nextString: changed: #escapeCharacterFor: #nextIdentifier class: Scanner class changed: #setupActions
Scanner.st
--- a/Scanner.st	Fri Feb 08 18:45:32 2019 +0100
+++ b/Scanner.st	Fri Feb 08 21:59:45 2019 +0100
@@ -25,8 +25,8 @@
 		didWarnAboutUnderscoreInIdentifier didWarnAboutOldStyleAssignment
 		didWarnAboutDollarInIdentifier didWarnAboutPeriodInSymbol
 		unicodeActions'
-	classVariableNames:'DefaultTypeArray DefaultActionArray EmptySourceNotificationSignal
-		DefaultUnicodeActions'
+	classVariableNames:'DefaultActionArray DefaultTypeArray DefaultUnicodeActions
+		EmptySourceNotificationSignal'
 	poolDictionaries:''
 	category:'System-Compiler'
 !
@@ -220,7 +220,7 @@
     "kludge: action is nextColonOrAssign, but type is special"
     typeArray at:($: codePoint) put:#special.
 
-    actionArray at:($' codePoint) put:[:s :char | s nextString:char].
+    actionArray at:($' codePoint) put:[:s :char | s nextString:char escapeStyle:nil].
     actionArray at:($$ codePoint) put:[:s :char | s nextCharacter].
     actionArray at:($# codePoint) put:[:s :char | s nextHash].
     actionArray at:($!! codePoint) put:[:s :char | s nextExcla].
@@ -242,7 +242,7 @@
     "
 
     "Modified: / 02-07-2017 / 01:11:27 / cg"
-    "Modified: / 07-08-2018 / 07:47:38 / Claus Gittinger"
+    "Modified: / 08-02-2019 / 19:13:32 / Claus Gittinger"
 ! !
 
 !Scanner class methodsFor:'instance creation'!
@@ -2271,6 +2271,23 @@
     ^ self
 !
 
+characterNamed:name
+    |idx|
+
+    idx := #(
+             'nul' 'soh' 'stx' 'etx' 'eot' 'enq' 'ack' 'bel'
+             'bs'  'ht'  'lf'  'vt'  'ff'  'cr'  'so'  'si'
+             'dle' 'dc1' 'dc2' 'dc3' 'dc4' 'nak' 'syn' 'etb'
+             'can' 'em'  'sub' 'esc' 'fs'  'gs'  'rs'  'us' ) indexOf:name.
+    idx == 0 ifTrue:[
+        name = 'tab' ifTrue:[ ^ Character tab ].
+        ^ nil
+    ].
+    ^ Character value:idx-1
+
+    "Created: / 08-02-2019 / 19:12:23 / Claus Gittinger"
+!
+
 checkForKeyword:string
     "check if string is a keyword (as opposed to an identifier).
      That is, its one of 'self', 'super', 'nil', 'true', 'false',
@@ -2359,20 +2376,23 @@
 
      much like character escapes in C-literals;
      expands:
-	\n      newLine
-	\r      return
-	\t      tab
-	\b      backspace
-	\f      formfeed
-	\g      bell
-
-	\\      backSlash
-	\ ...\  (backslash-separator) ignored up to next backslash
-	\xNN    hexCharacter
-	\uNNNN  hex UnicodeCharacter
+        \n      newLine
+        \r      return
+        \t      tab
+        \b      backspace
+        \f      formfeed
+        \g      bell
+        \0      nul
+
+        \\        backSlash
+        \ ...\    (backslash-separator) ignored up to next backslash
+        \xNN      hexCharacter
+        \uNNNN    hex UnicodeCharacter
+        \UNNNNNN  hex UnicodeCharacter
+        \<name>   named character
     "
 
-    |ascii nextChar fetchNext|
+    |ascii nextChar fetchNext name pos1|
 
     aCharacter == $n ifTrue:[^ Character nl].
     aCharacter == $r ifTrue:[^ Character return].
@@ -2380,37 +2400,70 @@
     aCharacter == $b ifTrue:[^ Character backspace].
     aCharacter == $f ifTrue:[^ Character ff].
     aCharacter == $g ifTrue:[^ Character bell].
+    aCharacter == $0 ifTrue:[^ Character null].
     aCharacter == $\ ifTrue:[^ aCharacter].
-    aCharacter isSeparator ifTrue:[
-	nextChar := source next.
-	[nextChar notNil and:[nextChar ~~ $\]] whileTrue:[
-	    (nextChar == Character cr) ifTrue:[
-		lineNr := lineNr + 1
-	    ].
-	    nextChar := source next.
-	].
-	^ nil
+    aCharacter == $< ifTrue:[
+        pos1 := source position.
+        name := ''.
+        nextChar := source next.
+        [nextChar notNil and:[nextChar ~~ $>]] whileTrue:[
+            nextChar isLetter ifFalse:[
+                self syntaxError:'letter expected in character name escape in string literal'
+                     position:pos1 to:(source position).
+            ].    
+            name size > 10 ifTrue:[
+                self syntaxError:'long character name escape in string literal'
+                     position:pos1 to:(source position).
+            ].    
+            name := name , nextChar.
+            nextChar := source next.
+        ].
+        nextChar := self characterNamed:name.
+        nextChar isNil ifTrue:[
+            self syntaxError:'invalid character name escape in string literal'
+                 position:pos1 to:(source position).
+        ].
+        ^ nextChar
     ].
-
-    (aCharacter == $x or:[ aCharacter == $u ]) ifTrue:[
-	fetchNext :=
-	    [
-		nextChar := source next.
-		(nextChar notNil and:[nextChar isDigitRadix:16]) ifFalse:[
-		    self syntaxError:'hex digit expected in string literal'
-			 position:(source position) to:(source position).
-		].
-		nextChar digitValue
-	    ].
-
-	ascii := fetchNext value.
-	ascii := (ascii bitShift:4) bitOr:(fetchNext value).
-
-	(aCharacter == $u ) ifTrue:[
-	    ascii := (ascii bitShift:4) bitOr:(fetchNext value).
-	    ascii := (ascii bitShift:4) bitOr:(fetchNext value).
-	].
-	^ Character value:ascii.
+    
+    aCharacter isSeparator ifTrue:[
+        nextChar := source next.
+        [nextChar notNil and:[nextChar ~~ $\]] whileTrue:[
+            (nextChar == Character cr) ifTrue:[
+                lineNr := lineNr + 1
+            ].
+            nextChar := source next.
+        ].
+        ^ nil
+    ].
+
+    (aCharacter == $x 
+    or:[ (aCharacter == $u)
+    or:[ (aCharacter == $U) ]]) ifTrue:[
+        pos1 := source position.
+
+        fetchNext :=
+            [
+                nextChar := source next.
+                (nextChar notNil and:[nextChar isDigitRadix:16]) ifFalse:[
+                    self syntaxError:'hex digit expected in string literal'
+                         position:pos1 to:(source position).
+                ].
+                nextChar digitValue
+            ].
+
+        ascii := fetchNext value.
+        ascii := (ascii bitShift:4) bitOr:(fetchNext value).
+
+        ((aCharacter == $u ) or:[(aCharacter == $U )]) ifTrue:[
+            ascii := (ascii bitShift:4) bitOr:(fetchNext value).
+            ascii := (ascii bitShift:4) bitOr:(fetchNext value).
+            (aCharacter == $U) ifTrue:[
+                ascii := (ascii bitShift:4) bitOr:(fetchNext value).
+                ascii := (ascii bitShift:4) bitOr:(fetchNext value).
+            ].
+        ].
+        ^ Character value:ascii.
     ].
     ^ aCharacter
 
@@ -2427,6 +2480,8 @@
     "
      ParserFlags allowExtendedSTXSyntax:false
     "
+
+    "Modified: / 08-02-2019 / 21:58:55 / Claus Gittinger"
 !
 
 ignoreErrors
@@ -2847,136 +2902,143 @@
     allowNationalCharactersInIdentifier := parserFlags allowNationalCharactersInIdentifier.
 
     hereChar == $_ ifTrue:[
-	"/
-	"/ no need to check for allowUnderscoreInIdentifier here;
-	"/ could not arrive here if it was off
-	"/
-	nextChar := source nextPeek.
-	parserFlags allowOldStyleAssignment ifTrue:[
-	    (nextChar notNil and:[ nextChar isLetterOrDigit or:[nextChar == $_]]) ifFalse:[
-		"oops: a single underscore is an old-style assignement"
-		nextChar == $: ifFalse:[
-		    self warnOldStyleAssignmentAt:tokenPosition.
-		    tokenType := token := $_.
-		    ^ tokenType
-		]
-	    ].
-	].
-	string := '_'.
-	self warnUnderscoreAt:tokenPosition.
-	[nextChar == $_] whileTrue:[
-	    string := string copyWith:$_.
-	    nextChar := source nextPeek.
-	].
-	(nextChar notNil and:[nextChar isLetterOrDigit]) ifTrue:[
-	    string := string , source nextAlphaNumericWord.
-	]
+        "/
+        "/ no need to check for allowUnderscoreInIdentifier here;
+        "/ could not arrive here if it was off
+        "/
+        nextChar := source nextPeek.
+        parserFlags allowOldStyleAssignment ifTrue:[
+            (nextChar notNil and:[ nextChar isLetterOrDigit or:[nextChar == $_]]) ifFalse:[
+                "oops: a single underscore is an old-style assignement"
+                nextChar == $: ifFalse:[
+                    self warnOldStyleAssignmentAt:tokenPosition.
+                    tokenType := token := $_.
+                    ^ tokenType
+                ]
+            ].
+        ].
+        string := '_'.
+        self warnUnderscoreAt:tokenPosition.
+        [nextChar == $_] whileTrue:[
+            string := string copyWith:$_.
+            nextChar := source nextPeek.
+        ].
+        (nextChar notNil and:[nextChar isLetterOrDigit]) ifTrue:[
+            string := string , source nextAlphaNumericWord.
+        ]
     ] ifFalse:[
-	nextChar := source peekOrNil.
-	(nextChar notNil and:[nextChar isLetterOrDigit]) ifTrue:[
-	    string := source nextAlphaNumericWord "self nextId".
-	] ifFalse:[
-	    string := ''
-	]
+        nextChar := source peekOrNil.
+        (nextChar notNil and:[nextChar isLetterOrDigit]) ifTrue:[
+            string := source nextAlphaNumericWord "self nextId".
+        ] ifFalse:[
+            string := ''
+        ]
     ].
     nextChar := source peekOrNil.
 
+    ((nextChar == $') 
+    and:[ (string size == 1)
+    and:[ parserFlags allowCStrings ]]) ifTrue:[
+        ^ self nextString:$' escapeStyle:#c
+    ].    
+
     (((nextChar == $_) and:[allowUnderscoreInIdentifier])
     or:[ (allowDollarInIdentifier and:[nextChar == $$ ])
     or:[ (nextChar == $§ and:[ parserFlags allowParagraphInIdentifier])
     or:[ allowNationalCharactersInIdentifier and:[ nextChar notNil and:[nextChar isNationalLetter]]]]]) ifTrue:[
-	pos := source position + 1.
-	nextChar == $_ ifTrue:[
-	    self warnUnderscoreAt:pos.
-	] ifFalse:[
-	    nextChar == $$ ifTrue:[
-		self warnDollarAt:pos.
-	    ] ifFalse:[
-		nextChar == $§ ifTrue:[
-		    self warnParagraphAt:pos.
-		] ifFalse:[
-		    "/ self warnNationalCharacterAt:pos.
-		]
-	    ]
-	].
-	ok := true.
-	[ok] whileTrue:[
-	    string := string copyWith:nextChar.
-	    nextChar := source nextPeek.
-	    nextChar isNil ifTrue:[
-		ok := false
-	    ] ifFalse:[
-		(nextChar isLetterOrDigit) ifTrue:[
-		    string := string , source nextAlphaNumericWord.
-		    nextChar := source peekOrNil.
-		].
-		ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
-		      or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
-		      or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
-		      or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
-	    ]
-	].
+        pos := source position + 1.
+        nextChar == $_ ifTrue:[
+            self warnUnderscoreAt:pos.
+        ] ifFalse:[
+            nextChar == $$ ifTrue:[
+                self warnDollarAt:pos.
+            ] ifFalse:[
+                nextChar == $§ ifTrue:[
+                    self warnParagraphAt:pos.
+                ] ifFalse:[
+                    "/ self warnNationalCharacterAt:pos.
+                ]
+            ]
+        ].
+        ok := true.
+        [ok] whileTrue:[
+            string := string copyWith:nextChar.
+            nextChar := source nextPeek.
+            nextChar isNil ifTrue:[
+                ok := false
+            ] ifFalse:[
+                (nextChar isLetterOrDigit) ifTrue:[
+                    string := string , source nextAlphaNumericWord.
+                    nextChar := source peekOrNil.
+                ].
+                ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
+                      or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
+                      or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
+                      or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
+            ]
+        ].
     ].
 
     (nextChar == $: and:[scanColonAsKeyword]) ifTrue:[
-	source next.
-	ch2 := source peekOrNil.
-	"/ colon follows - care for '::' (nameSpace separator) or ':=' (assignment)
-	(ch2 == $=) ifFalse:[
-	    (ch2 == $:) ifFalse:[
-		tokenName := token := string copyWith:nextChar.
-		tokenType := #Keyword.
-		inArrayLiteral == true ifTrue:[
-		    (ch2 isLetter
-		    or:[ch2 == $_ and:[allowUnderscoreInIdentifier]]) ifTrue:[
-			"/ kludge: recurse to read the rest.
-			self nextIdentifier.
-			tokenName := token := (string copyWith:nextChar) , token.
-			tokenType ~~ #Keyword ifTrue:[
-			    self syntaxError:'invalid keyword symbol in array constant'
-				    position:tokenPosition to:(source position).
-			].
-			tokenType := #Keyword.
-		    ].
-		].
-		^ tokenType
-	    ].
-	    peekChar := $:.
-	    peekChar2 := $:.
-	] ifTrue:[
-	    peekChar := $:.
-	    peekChar2 := $=.
-	]
+        source next.
+        ch2 := source peekOrNil.
+        "/ colon follows - care for '::' (nameSpace separator) or ':=' (assignment)
+        (ch2 == $=) ifFalse:[
+            (ch2 == $:) ifFalse:[
+                tokenName := token := string copyWith:nextChar.
+                tokenType := #Keyword.
+                inArrayLiteral == true ifTrue:[
+                    (ch2 isLetter
+                    or:[ch2 == $_ and:[allowUnderscoreInIdentifier]]) ifTrue:[
+                        "/ kludge: recurse to read the rest.
+                        self nextIdentifier.
+                        tokenName := token := (string copyWith:nextChar) , token.
+                        tokenType ~~ #Keyword ifTrue:[
+                            self syntaxError:'invalid keyword symbol in array constant'
+                                    position:tokenPosition to:(source position).
+                        ].
+                        tokenType := #Keyword.
+                    ].
+                ].
+                ^ tokenType
+            ].
+            peekChar := $:.
+            peekChar2 := $:.
+        ] ifTrue:[
+            peekChar := $:.
+            peekChar2 := $=.
+        ]
     ] ifFalse:[
-	(nextChar == $. and:[parserFlags allowQualifiedNames]) ifTrue:[
-	    "/ period follows - if next-after character is an identifier character,
-	    "/ make peekSym a #NameSpaceSeparator; otherwise a $.
-	    source next.
-	    ch2 := source peekOrNil.
-	    (ch2 notNil
-	    and:[ch2 isLetter or:[ch2 == $_ and:[allowUnderscoreInIdentifier]]]) ifTrue:[
-		peekChar := #'::'.
-	    ] ifFalse:[
-		peekChar := $.
-	    ].
-	].
+        (nextChar == $. and:[parserFlags allowQualifiedNames]) ifTrue:[
+            "/ period follows - if next-after character is an identifier character,
+            "/ make peekSym a #NameSpaceSeparator; otherwise a $.
+            source next.
+            ch2 := source peekOrNil.
+            (ch2 notNil
+            and:[ch2 isLetter or:[ch2 == $_ and:[allowUnderscoreInIdentifier]]]) ifTrue:[
+                peekChar := #'::'.
+            ] ifFalse:[
+                peekChar := $.
+            ].
+        ].
     ].
 
     nextChar == $- ifTrue:[
-	pos := source position + 1.
-	self
-	    warnPossibleIncompatibility:'add spaces around ''-'' for compatibility with other systems'
-	    position:pos to:pos.
+        pos := source position + 1.
+        self
+            warnPossibleIncompatibility:'add spaces around ''-'' for compatibility with other systems'
+            position:pos to:pos.
     ].
 
     tokenName := token := string.
     (self checkForKeyword:string) ifFalse:[
-	tokenType := #Identifier.
+        tokenType := #Identifier.
     ].
     ^ tokenType
 
     "Created: / 13-09-1995 / 12:56:42 / claus"
     "Modified: / 17-11-2016 / 09:19:46 / cg"
+    "Modified: / 08-02-2019 / 19:09:51 / Claus Gittinger"
 !
 
 nextMantissa:radix
@@ -3297,6 +3359,38 @@
 nextString:delimiter
     "a quote has been scanned; scan the string (caring for doubled quotes)"
 
+    ^ self nextString:delimiter escapeStyle:nil
+
+    "
+     old style ST80 string (no escapes):
+        'hello\new world'
+
+     ParserFlags allowCStrings:true.
+     
+     new style STX c-string (c escapes):
+        c'hello\nnew world'
+        c'hello\tnew world'
+        c'hello\<tab>new world'
+        c'\f'
+     
+     ParserFlags allowCStrings:false.
+    "
+
+    "Created: / 01-08-2006 / 14:56:07 / cg"
+    "Modified: / 22-08-2006 / 14:10:26 / cg"
+    "Modified (comment): / 08-02-2019 / 21:56:19 / Claus Gittinger"
+!
+
+nextString:delimiter escapeStyle:escapeStyle
+    "a quote has been scanned; scan the string (caring for doubled quotes).
+     escapeStyle may be:
+        nil - old style ST80 strings (no character escapes)
+        #c  - C-style escapes: 
+                        \n,\t,\r,\b,\xXX,\uXXXX,\UXXXXXX,
+        #x  - extended-style escapes: 
+                        c-style PLUS \<nul>,\<...>
+    "
+
     |nextChar string pos
      index "{ Class: SmallInteger }"
      len   "{ Class: SmallInteger }"
@@ -3329,7 +3423,7 @@
                     inString := false
                 ]
             ] ifFalse:[
-                parserFlags allowExtendedSTXSyntax == true ifTrue:[
+                (parserFlags allowExtendedSTXSyntax == true or:[escapeStyle notNil]) ifTrue:[
                     (nextChar == $\) ifTrue:[
                         peekChar := source peekOrNil.
                         peekChar notNil ifTrue:[
@@ -3366,9 +3460,7 @@
     tokenType := #String.
     ^ tokenType
 
-    "Created: / 01-08-2006 / 14:56:07 / cg"
-    "Modified: / 22-08-2006 / 14:10:26 / cg"
-    "Modified (comment): / 07-08-2018 / 07:40:23 / Claus Gittinger"
+    "Created: / 08-02-2019 / 19:07:57 / Claus Gittinger"
 !
 
 nextSymbolAfterHash