--- a/Scanner.st Fri Feb 08 18:45:32 2019 +0100
+++ b/Scanner.st Fri Feb 08 21:59:45 2019 +0100
@@ -25,8 +25,8 @@
didWarnAboutUnderscoreInIdentifier didWarnAboutOldStyleAssignment
didWarnAboutDollarInIdentifier didWarnAboutPeriodInSymbol
unicodeActions'
- classVariableNames:'DefaultTypeArray DefaultActionArray EmptySourceNotificationSignal
- DefaultUnicodeActions'
+ classVariableNames:'DefaultActionArray DefaultTypeArray DefaultUnicodeActions
+ EmptySourceNotificationSignal'
poolDictionaries:''
category:'System-Compiler'
!
@@ -220,7 +220,7 @@
"kludge: action is nextColonOrAssign, but type is special"
typeArray at:($: codePoint) put:#special.
- actionArray at:($' codePoint) put:[:s :char | s nextString:char].
+ actionArray at:($' codePoint) put:[:s :char | s nextString:char escapeStyle:nil].
actionArray at:($$ codePoint) put:[:s :char | s nextCharacter].
actionArray at:($# codePoint) put:[:s :char | s nextHash].
actionArray at:($!! codePoint) put:[:s :char | s nextExcla].
@@ -242,7 +242,7 @@
"
"Modified: / 02-07-2017 / 01:11:27 / cg"
- "Modified: / 07-08-2018 / 07:47:38 / Claus Gittinger"
+ "Modified: / 08-02-2019 / 19:13:32 / Claus Gittinger"
! !
!Scanner class methodsFor:'instance creation'!
@@ -2271,6 +2271,23 @@
^ self
!
+characterNamed:name
+ |idx|
+
+ idx := #(
+ 'nul' 'soh' 'stx' 'etx' 'eot' 'enq' 'ack' 'bel'
+ 'bs' 'ht' 'lf' 'vt' 'ff' 'cr' 'so' 'si'
+ 'dle' 'dc1' 'dc2' 'dc3' 'dc4' 'nak' 'syn' 'etb'
+ 'can' 'em' 'sub' 'esc' 'fs' 'gs' 'rs' 'us' ) indexOf:name.
+ idx == 0 ifTrue:[
+ name = 'tab' ifTrue:[ ^ Character tab ].
+ ^ nil
+ ].
+ ^ Character value:idx-1
+
+ "Created: / 08-02-2019 / 19:12:23 / Claus Gittinger"
+!
+
checkForKeyword:string
"check if string is a keyword (as opposed to an identifier).
That is, its one of 'self', 'super', 'nil', 'true', 'false',
@@ -2359,20 +2376,23 @@
much like character escapes in C-literals;
expands:
- \n newLine
- \r return
- \t tab
- \b backspace
- \f formfeed
- \g bell
-
- \\ backSlash
- \ ...\ (backslash-separator) ignored up to next backslash
- \xNN hexCharacter
- \uNNNN hex UnicodeCharacter
+ \n newLine
+ \r return
+ \t tab
+ \b backspace
+ \f formfeed
+ \g bell
+ \0 nul
+
+ \\ backSlash
+ \ ...\ (backslash-separator) ignored up to next backslash
+ \xNN hexCharacter
+ \uNNNN hex UnicodeCharacter
+ \UNNNNNN hex UnicodeCharacter
+ \<name> named character
"
- |ascii nextChar fetchNext|
+ |ascii nextChar fetchNext name pos1|
aCharacter == $n ifTrue:[^ Character nl].
aCharacter == $r ifTrue:[^ Character return].
@@ -2380,37 +2400,70 @@
aCharacter == $b ifTrue:[^ Character backspace].
aCharacter == $f ifTrue:[^ Character ff].
aCharacter == $g ifTrue:[^ Character bell].
+ aCharacter == $0 ifTrue:[^ Character null].
aCharacter == $\ ifTrue:[^ aCharacter].
- aCharacter isSeparator ifTrue:[
- nextChar := source next.
- [nextChar notNil and:[nextChar ~~ $\]] whileTrue:[
- (nextChar == Character cr) ifTrue:[
- lineNr := lineNr + 1
- ].
- nextChar := source next.
- ].
- ^ nil
+ aCharacter == $< ifTrue:[
+ pos1 := source position.
+ name := ''.
+ nextChar := source next.
+ [nextChar notNil and:[nextChar ~~ $>]] whileTrue:[
+ nextChar isLetter ifFalse:[
+ self syntaxError:'letter expected in character name escape in string literal'
+ position:pos1 to:(source position).
+ ].
+ name size > 10 ifTrue:[
+ self syntaxError:'long character name escape in string literal'
+ position:pos1 to:(source position).
+ ].
+ name := name , nextChar.
+ nextChar := source next.
+ ].
+ nextChar := self characterNamed:name.
+ nextChar isNil ifTrue:[
+ self syntaxError:'invalid character name escape in string literal'
+ position:pos1 to:(source position).
+ ].
+ ^ nextChar
].
-
- (aCharacter == $x or:[ aCharacter == $u ]) ifTrue:[
- fetchNext :=
- [
- nextChar := source next.
- (nextChar notNil and:[nextChar isDigitRadix:16]) ifFalse:[
- self syntaxError:'hex digit expected in string literal'
- position:(source position) to:(source position).
- ].
- nextChar digitValue
- ].
-
- ascii := fetchNext value.
- ascii := (ascii bitShift:4) bitOr:(fetchNext value).
-
- (aCharacter == $u ) ifTrue:[
- ascii := (ascii bitShift:4) bitOr:(fetchNext value).
- ascii := (ascii bitShift:4) bitOr:(fetchNext value).
- ].
- ^ Character value:ascii.
+
+ aCharacter isSeparator ifTrue:[
+ nextChar := source next.
+ [nextChar notNil and:[nextChar ~~ $\]] whileTrue:[
+ (nextChar == Character cr) ifTrue:[
+ lineNr := lineNr + 1
+ ].
+ nextChar := source next.
+ ].
+ ^ nil
+ ].
+
+ (aCharacter == $x
+ or:[ (aCharacter == $u)
+ or:[ (aCharacter == $U) ]]) ifTrue:[
+ pos1 := source position.
+
+ fetchNext :=
+ [
+ nextChar := source next.
+ (nextChar notNil and:[nextChar isDigitRadix:16]) ifFalse:[
+ self syntaxError:'hex digit expected in string literal'
+ position:pos1 to:(source position).
+ ].
+ nextChar digitValue
+ ].
+
+ ascii := fetchNext value.
+ ascii := (ascii bitShift:4) bitOr:(fetchNext value).
+
+ ((aCharacter == $u ) or:[(aCharacter == $U )]) ifTrue:[
+ ascii := (ascii bitShift:4) bitOr:(fetchNext value).
+ ascii := (ascii bitShift:4) bitOr:(fetchNext value).
+ (aCharacter == $U) ifTrue:[
+ ascii := (ascii bitShift:4) bitOr:(fetchNext value).
+ ascii := (ascii bitShift:4) bitOr:(fetchNext value).
+ ].
+ ].
+ ^ Character value:ascii.
].
^ aCharacter
@@ -2427,6 +2480,8 @@
"
ParserFlags allowExtendedSTXSyntax:false
"
+
+ "Modified: / 08-02-2019 / 21:58:55 / Claus Gittinger"
!
ignoreErrors
@@ -2847,136 +2902,143 @@
allowNationalCharactersInIdentifier := parserFlags allowNationalCharactersInIdentifier.
hereChar == $_ ifTrue:[
- "/
- "/ no need to check for allowUnderscoreInIdentifier here;
- "/ could not arrive here if it was off
- "/
- nextChar := source nextPeek.
- parserFlags allowOldStyleAssignment ifTrue:[
- (nextChar notNil and:[ nextChar isLetterOrDigit or:[nextChar == $_]]) ifFalse:[
- "oops: a single underscore is an old-style assignement"
- nextChar == $: ifFalse:[
- self warnOldStyleAssignmentAt:tokenPosition.
- tokenType := token := $_.
- ^ tokenType
- ]
- ].
- ].
- string := '_'.
- self warnUnderscoreAt:tokenPosition.
- [nextChar == $_] whileTrue:[
- string := string copyWith:$_.
- nextChar := source nextPeek.
- ].
- (nextChar notNil and:[nextChar isLetterOrDigit]) ifTrue:[
- string := string , source nextAlphaNumericWord.
- ]
+ "/
+ "/ no need to check for allowUnderscoreInIdentifier here;
+ "/ could not arrive here if it was off
+ "/
+ nextChar := source nextPeek.
+ parserFlags allowOldStyleAssignment ifTrue:[
+ (nextChar notNil and:[ nextChar isLetterOrDigit or:[nextChar == $_]]) ifFalse:[
+ "oops: a single underscore is an old-style assignement"
+ nextChar == $: ifFalse:[
+ self warnOldStyleAssignmentAt:tokenPosition.
+ tokenType := token := $_.
+ ^ tokenType
+ ]
+ ].
+ ].
+ string := '_'.
+ self warnUnderscoreAt:tokenPosition.
+ [nextChar == $_] whileTrue:[
+ string := string copyWith:$_.
+ nextChar := source nextPeek.
+ ].
+ (nextChar notNil and:[nextChar isLetterOrDigit]) ifTrue:[
+ string := string , source nextAlphaNumericWord.
+ ]
] ifFalse:[
- nextChar := source peekOrNil.
- (nextChar notNil and:[nextChar isLetterOrDigit]) ifTrue:[
- string := source nextAlphaNumericWord "self nextId".
- ] ifFalse:[
- string := ''
- ]
+ nextChar := source peekOrNil.
+ (nextChar notNil and:[nextChar isLetterOrDigit]) ifTrue:[
+ string := source nextAlphaNumericWord "self nextId".
+ ] ifFalse:[
+ string := ''
+ ]
].
nextChar := source peekOrNil.
+ ((nextChar == $')
+ and:[ (string size == 1)
+ and:[ parserFlags allowCStrings ]]) ifTrue:[
+ ^ self nextString:$' escapeStyle:#c
+ ].
+
(((nextChar == $_) and:[allowUnderscoreInIdentifier])
or:[ (allowDollarInIdentifier and:[nextChar == $$ ])
or:[ (nextChar == $§ and:[ parserFlags allowParagraphInIdentifier])
or:[ allowNationalCharactersInIdentifier and:[ nextChar notNil and:[nextChar isNationalLetter]]]]]) ifTrue:[
- pos := source position + 1.
- nextChar == $_ ifTrue:[
- self warnUnderscoreAt:pos.
- ] ifFalse:[
- nextChar == $$ ifTrue:[
- self warnDollarAt:pos.
- ] ifFalse:[
- nextChar == $§ ifTrue:[
- self warnParagraphAt:pos.
- ] ifFalse:[
- "/ self warnNationalCharacterAt:pos.
- ]
- ]
- ].
- ok := true.
- [ok] whileTrue:[
- string := string copyWith:nextChar.
- nextChar := source nextPeek.
- nextChar isNil ifTrue:[
- ok := false
- ] ifFalse:[
- (nextChar isLetterOrDigit) ifTrue:[
- string := string , source nextAlphaNumericWord.
- nextChar := source peekOrNil.
- ].
- ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
- or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
- or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
- or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
- ]
- ].
+ pos := source position + 1.
+ nextChar == $_ ifTrue:[
+ self warnUnderscoreAt:pos.
+ ] ifFalse:[
+ nextChar == $$ ifTrue:[
+ self warnDollarAt:pos.
+ ] ifFalse:[
+ nextChar == $§ ifTrue:[
+ self warnParagraphAt:pos.
+ ] ifFalse:[
+ "/ self warnNationalCharacterAt:pos.
+ ]
+ ]
+ ].
+ ok := true.
+ [ok] whileTrue:[
+ string := string copyWith:nextChar.
+ nextChar := source nextPeek.
+ nextChar isNil ifTrue:[
+ ok := false
+ ] ifFalse:[
+ (nextChar isLetterOrDigit) ifTrue:[
+ string := string , source nextAlphaNumericWord.
+ nextChar := source peekOrNil.
+ ].
+ ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
+ or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
+ or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
+ or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
+ ]
+ ].
].
(nextChar == $: and:[scanColonAsKeyword]) ifTrue:[
- source next.
- ch2 := source peekOrNil.
- "/ colon follows - care for '::' (nameSpace separator) or ':=' (assignment)
- (ch2 == $=) ifFalse:[
- (ch2 == $:) ifFalse:[
- tokenName := token := string copyWith:nextChar.
- tokenType := #Keyword.
- inArrayLiteral == true ifTrue:[
- (ch2 isLetter
- or:[ch2 == $_ and:[allowUnderscoreInIdentifier]]) ifTrue:[
- "/ kludge: recurse to read the rest.
- self nextIdentifier.
- tokenName := token := (string copyWith:nextChar) , token.
- tokenType ~~ #Keyword ifTrue:[
- self syntaxError:'invalid keyword symbol in array constant'
- position:tokenPosition to:(source position).
- ].
- tokenType := #Keyword.
- ].
- ].
- ^ tokenType
- ].
- peekChar := $:.
- peekChar2 := $:.
- ] ifTrue:[
- peekChar := $:.
- peekChar2 := $=.
- ]
+ source next.
+ ch2 := source peekOrNil.
+ "/ colon follows - care for '::' (nameSpace separator) or ':=' (assignment)
+ (ch2 == $=) ifFalse:[
+ (ch2 == $:) ifFalse:[
+ tokenName := token := string copyWith:nextChar.
+ tokenType := #Keyword.
+ inArrayLiteral == true ifTrue:[
+ (ch2 isLetter
+ or:[ch2 == $_ and:[allowUnderscoreInIdentifier]]) ifTrue:[
+ "/ kludge: recurse to read the rest.
+ self nextIdentifier.
+ tokenName := token := (string copyWith:nextChar) , token.
+ tokenType ~~ #Keyword ifTrue:[
+ self syntaxError:'invalid keyword symbol in array constant'
+ position:tokenPosition to:(source position).
+ ].
+ tokenType := #Keyword.
+ ].
+ ].
+ ^ tokenType
+ ].
+ peekChar := $:.
+ peekChar2 := $:.
+ ] ifTrue:[
+ peekChar := $:.
+ peekChar2 := $=.
+ ]
] ifFalse:[
- (nextChar == $. and:[parserFlags allowQualifiedNames]) ifTrue:[
- "/ period follows - if next-after character is an identifier character,
- "/ make peekSym a #NameSpaceSeparator; otherwise a $.
- source next.
- ch2 := source peekOrNil.
- (ch2 notNil
- and:[ch2 isLetter or:[ch2 == $_ and:[allowUnderscoreInIdentifier]]]) ifTrue:[
- peekChar := #'::'.
- ] ifFalse:[
- peekChar := $.
- ].
- ].
+ (nextChar == $. and:[parserFlags allowQualifiedNames]) ifTrue:[
+ "/ period follows - if next-after character is an identifier character,
+ "/ make peekSym a #NameSpaceSeparator; otherwise a $.
+ source next.
+ ch2 := source peekOrNil.
+ (ch2 notNil
+ and:[ch2 isLetter or:[ch2 == $_ and:[allowUnderscoreInIdentifier]]]) ifTrue:[
+ peekChar := #'::'.
+ ] ifFalse:[
+ peekChar := $.
+ ].
+ ].
].
nextChar == $- ifTrue:[
- pos := source position + 1.
- self
- warnPossibleIncompatibility:'add spaces around ''-'' for compatibility with other systems'
- position:pos to:pos.
+ pos := source position + 1.
+ self
+ warnPossibleIncompatibility:'add spaces around ''-'' for compatibility with other systems'
+ position:pos to:pos.
].
tokenName := token := string.
(self checkForKeyword:string) ifFalse:[
- tokenType := #Identifier.
+ tokenType := #Identifier.
].
^ tokenType
"Created: / 13-09-1995 / 12:56:42 / claus"
"Modified: / 17-11-2016 / 09:19:46 / cg"
+ "Modified: / 08-02-2019 / 19:09:51 / Claus Gittinger"
!
nextMantissa:radix
@@ -3297,6 +3359,38 @@
nextString:delimiter
"a quote has been scanned; scan the string (caring for doubled quotes)"
+ ^ self nextString:delimiter escapeStyle:nil
+
+ "
+ old style ST80 string (no escapes):
+ 'hello\new world'
+
+ ParserFlags allowCStrings:true.
+
+ new style STX c-string (c escapes):
+ c'hello\nnew world'
+ c'hello\tnew world'
+ c'hello\<tab>new world'
+ c'\f'
+
+ ParserFlags allowCStrings:false.
+ "
+
+ "Created: / 01-08-2006 / 14:56:07 / cg"
+ "Modified: / 22-08-2006 / 14:10:26 / cg"
+ "Modified (comment): / 08-02-2019 / 21:56:19 / Claus Gittinger"
+!
+
+nextString:delimiter escapeStyle:escapeStyle
+ "a quote has been scanned; scan the string (caring for doubled quotes).
+ escapeStyle may be:
+ nil - old style ST80 strings (no character escapes)
+ #c - C-style escapes:
+ \n,\t,\r,\b,\xXX,\uXXXX,\UXXXXXX,
+ #x - extended-style escapes:
+ c-style PLUS \<nul>,\<...>
+ "
+
|nextChar string pos
index "{ Class: SmallInteger }"
len "{ Class: SmallInteger }"
@@ -3329,7 +3423,7 @@
inString := false
]
] ifFalse:[
- parserFlags allowExtendedSTXSyntax == true ifTrue:[
+ (parserFlags allowExtendedSTXSyntax == true or:[escapeStyle notNil]) ifTrue:[
(nextChar == $\) ifTrue:[
peekChar := source peekOrNil.
peekChar notNil ifTrue:[
@@ -3366,9 +3460,7 @@
tokenType := #String.
^ tokenType
- "Created: / 01-08-2006 / 14:56:07 / cg"
- "Modified: / 22-08-2006 / 14:10:26 / cg"
- "Modified (comment): / 07-08-2018 / 07:40:23 / Claus Gittinger"
+ "Created: / 08-02-2019 / 19:07:57 / Claus Gittinger"
!
nextSymbolAfterHash