Scanner.st
changeset 4432 fef0c840421e
parent 4426 49123df91ffa
child 4433 ad2645bee258
equal deleted inserted replaced
4431:5cbe584d6f60 4432:fef0c840421e
       
     1 "{ Encoding: utf8 }"
       
     2 
     1 "
     3 "
     2  COPYRIGHT (c) 1989 by Claus Gittinger
     4  COPYRIGHT (c) 1989 by Claus Gittinger
     3 	      All Rights Reserved
     5 	      All Rights Reserved
     4 
     6 
     5  This software is furnished under a license and may be used
     7  This software is furnished under a license and may be used
   141 !
   143 !
   142 
   144 
   143 extendedBinarySelectorCharacters
   145 extendedBinarySelectorCharacters
   144     "return a collection of characters which are optionally allowed in binary selectors"
   146     "return a collection of characters which are optionally allowed in binary selectors"
   145 
   147 
   146     "/ ^ '±×·÷«»'.
   148     "/ ^ '±×·÷«»'.
   147     ^ String
   149     ^ String
   148 	with:(Character value:16rB1)  "/ plus-minus
   150 	with:(Character value:16rB1)  "/ plus-minus
   149 	with:(Character value:16rD7)  "/ times
   151 	with:(Character value:16rD7)  "/ times
   150 	with:(Character value:16rB7)  "/ centered dot
   152 	with:(Character value:16rB7)  "/ centered dot
   151 	with:(Character value:16rF7)  "/ divide
   153 	with:(Character value:16rF7)  "/ divide
  1854 
  1856 
  1855     "Modified: 23.5.1997 / 12:16:48 / cg"
  1857     "Modified: 23.5.1997 / 12:16:48 / cg"
  1856 !
  1858 !
  1857 
  1859 
  1858 warnParagraphAt:position
  1860 warnParagraphAt:position
  1859     "warn about §-character in an identifier"
  1861     "warn about §-character in an identifier"
  1860 
  1862 
  1861     ignoreWarnings ifFalse:[
  1863     ignoreWarnings ifFalse:[
  1862 	"/ didWarnAboutParagraphInIdentifier ifFalse:[
  1864 	"/ didWarnAboutParagraphInIdentifier ifFalse:[
  1863 	    parserFlags warnParagraphInIdentifier ifTrue:[
  1865 	    parserFlags warnParagraphInIdentifier ifTrue:[
  1864 		self
  1866 		self
  1865 		    warning:'§-characters in identifiers/symbols are nonportable'
  1867 		    warning:'§-characters in identifiers/symbols are nonportable'
  1866 		    doNotShowAgainAction:[ ParserFlags warnParagraphInIdentifier:false ]
  1868 		    doNotShowAgainAction:[ ParserFlags warnParagraphInIdentifier:false ]
  1867 		    position:position to:position.
  1869 		    position:position to:position.
  1868 		"
  1870 		"
  1869 		 only warn once (per method)
  1871 		 only warn once (per method)
  1870 		"
  1872 		"
  2993     ].
  2995     ].
  2994     nextChar := source peekOrNil.
  2996     nextChar := source peekOrNil.
  2995 
  2997 
  2996     ((nextChar == $') 
  2998     ((nextChar == $') 
  2997       and:[ (string size == 1)
  2999       and:[ (string size == 1)
  2998       and:[ (parserFlags allowCStrings 
  3000       and:[ ((parserFlags allowCStrings and:[string = 'c'])
  2999             or:[parserFlags allowEStrings
  3001             or:[(parserFlags allowEStrings and:[string = 'e'])
  3000             or:[parserFlags allowExtendedSTXSyntax ]]) ]]
  3002             or:[(parserFlags allowRStrings and:[string = 'r'])
       
  3003             or:[parserFlags allowExtendedSTXSyntax ]]]) ]]
  3001     ) ifTrue:[
  3004     ) ifTrue:[
  3002         source next.
  3005         source next.
  3003         ^ self xnextString:$' escapeStyle:string
  3006         ^ self xnextString:$' escapeStyle:string
  3004     ].    
  3007     ].    
  3005 
  3008 
  3006     (((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3009     (((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3007     or:[ (allowDollarInIdentifier and:[nextChar == $$ ])
  3010     or:[ (allowDollarInIdentifier and:[nextChar == $$ ])
  3008     or:[ (nextChar == $§ and:[ parserFlags allowParagraphInIdentifier])
  3011     or:[ (nextChar == $§ and:[ parserFlags allowParagraphInIdentifier])
  3009     or:[ allowNationalCharactersInIdentifier and:[ nextChar notNil and:[nextChar isNationalLetter]]]]]) ifTrue:[
  3012     or:[ allowNationalCharactersInIdentifier and:[ nextChar notNil and:[nextChar isNationalLetter]]]]]) ifTrue:[
  3010         pos := source position + 1.
  3013         pos := source position + 1.
  3011         nextChar == $_ ifTrue:[
  3014         nextChar == $_ ifTrue:[
  3012             self warnUnderscoreAt:pos.
  3015             self warnUnderscoreAt:pos.
  3013         ] ifFalse:[
  3016         ] ifFalse:[
  3014             nextChar == $$ ifTrue:[
  3017             nextChar == $$ ifTrue:[
  3015                 self warnDollarAt:pos.
  3018                 self warnDollarAt:pos.
  3016             ] ifFalse:[
  3019             ] ifFalse:[
  3017                 nextChar == $§ ifTrue:[
  3020                 nextChar == $§ ifTrue:[
  3018                     self warnParagraphAt:pos.
  3021                     self warnParagraphAt:pos.
  3019                 ] ifFalse:[
  3022                 ] ifFalse:[
  3020                     "/ self warnNationalCharacterAt:pos.
  3023                     "/ self warnNationalCharacterAt:pos.
  3021                 ]
  3024                 ]
  3022             ]
  3025             ]
  3032                     string := string , source nextAlphaNumericWord.
  3035                     string := string , source nextAlphaNumericWord.
  3033                     nextChar := source peekOrNil.
  3036                     nextChar := source peekOrNil.
  3034                 ].
  3037                 ].
  3035                 ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3038                 ok := ((nextChar == $_) and:[allowUnderscoreInIdentifier])
  3036                       or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
  3039                       or:[((nextChar == $$ ) and:[allowDollarInIdentifier])
  3037                       or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
  3040                       or:[((nextChar == $§ ) and:[parserFlags allowParagraphInIdentifier])
  3038                       or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
  3041                       or:[(nextChar notNil and:[nextChar isNationalLetter]) and:[allowNationalCharactersInIdentifier]]]].
  3039             ]
  3042             ]
  3040         ].
  3043         ].
  3041     ].
  3044     ].
  3042 
  3045 
  3097     ].
  3100     ].
  3098     ^ tokenType
  3101     ^ tokenType
  3099 
  3102 
  3100     "Created: / 13-09-1995 / 12:56:42 / claus"
  3103     "Created: / 13-09-1995 / 12:56:42 / claus"
  3101     "Modified: / 17-11-2016 / 09:19:46 / cg"
  3104     "Modified: / 17-11-2016 / 09:19:46 / cg"
  3102     "Modified: / 23-05-2019 / 10:44:00 / Claus Gittinger"
  3105     "Modified: / 03-06-2019 / 11:14:55 / Claus Gittinger"
  3103 !
  3106 !
  3104 
  3107 
  3105 nextMantissa:radix
  3108 nextMantissa:radix
  3106     "read the mantissa of a radix number"
  3109     "read the mantissa of a radix number"
  3107 
  3110 
  3438         c'a\0b'
  3441         c'a\0b'
  3439         c'a\n\\\nb'
  3442         c'a\n\\\nb'
  3440         c'a\r\\\nb'
  3443         c'a\r\\\nb'
  3441      
  3444      
  3442      ParserFlags allowCStrings:false.
  3445      ParserFlags allowCStrings:false.
       
  3446 
       
  3447 
       
  3448      ParserFlags allowRStrings:true.
       
  3449 
       
  3450      STX regex:
       
  3451         r'a+b+'
       
  3452 
       
  3453      ParserFlags allowRStrings:false.
  3443     "
  3454     "
  3444 
  3455 
  3445     "Created: / 01-08-2006 / 14:56:07 / cg"
  3456     "Created: / 01-08-2006 / 14:56:07 / cg"
  3446     "Modified: / 22-08-2006 / 14:10:26 / cg"
  3457     "Modified: / 22-08-2006 / 14:10:26 / cg"
  3447     "Modified: / 22-05-2019 / 20:32:14 / Claus Gittinger"
  3458     "Modified: / 22-05-2019 / 20:32:14 / Claus Gittinger"
       
  3459     "Modified (comment): / 03-06-2019 / 11:16:00 / Claus Gittinger"
  3448 !
  3460 !
  3449 
  3461 
  3450 nextString:delimiter escapeStyle:escapeStyle
  3462 nextString:delimiter escapeStyle:escapeStyle
  3451     "a quote has been scanned; scan the string (caring for doubled quotes).
  3463     "a quote has been scanned; scan the string (caring for doubled quotes).
  3452      escapeStyle may be:
  3464      escapeStyle may be:
  3727 		tok notNil ifTrue:[
  3739 		tok notNil ifTrue:[
  3728 		    ^ tok
  3740 		    ^ tok
  3729 		].
  3741 		].
  3730 		"/ a nil token means: continue reading
  3742 		"/ a nil token means: continue reading
  3731 	    ] ifFalse:[
  3743 	    ] ifFalse:[
  3732 		(ch == $§ and:[parserFlags allowParagraphInIdentifier]) ifTrue:[
  3744 		(ch == $§ and:[parserFlags allowParagraphInIdentifier]) ifTrue:[
  3733 		    tok := self nextIdentifier.
  3745 		    tok := self nextIdentifier.
  3734 		    tok notNil ifTrue:[
  3746 		    tok notNil ifTrue:[
  3735 			^ tok
  3747 			^ tok
  3736 		    ].
  3748 		    ].
  3737 		    "/ a nil token means: continue reading
  3749 		    "/ a nil token means: continue reading
  3971                         
  3983                         
  3972         'e' - C-style plus embedded escapes:
  3984         'e' - C-style plus embedded escapes:
  3973                         e'...{ expr1 } ... { exprN }' will generate:
  3985                         e'...{ expr1 } ... { exprN }' will generate:
  3974                         '...%1 ... %N' bindWithArguments:{ expr1 . ... . exprN }
  3986                         '...%1 ... %N' bindWithArguments:{ expr1 . ... . exprN }
  3975                         
  3987                         
       
  3988         'r' - regex
       
  3989 
  3976         'x' - extended-style escapes:
  3990         'x' - extended-style escapes:
  3977                         as yet unsupported
  3991                         as yet unsupported
  3978         'r' - regex
       
  3979                         as yet unsupported
       
  3980 
       
  3981     "
  3992     "
  3982 
  3993 
  3983     |nextChar string pos
  3994     |nextChar string pos
  3984      index "{ Class: SmallInteger }"
  3995      index "{ Class: SmallInteger }"
  3985      len   "{ Class: SmallInteger }"
  3996      len   "{ Class: SmallInteger }"
  3986      inString peekChar
  3997      inString peekChar
  3987      isCString isEString|
  3998      isCString isEString isRString|
  3988 
  3999 
  3989     isEString := (escapeStyle = 'e').
  4000     isEString := (escapeStyle = 'e').
  3990     isCString := (escapeStyle = 'c').
  4001     isCString := (escapeStyle = 'c').
       
  4002     isRString := (escapeStyle = 'r').
  3991     
  4003     
  3992     string := String uninitializedNew:20.
  4004     string := String uninitializedNew:20.
  3993     len := 20.
  4005     len := 20.
  3994     index := 1.
  4006     index := 1.
  3995     pos := source position.
  4007     pos := source position.
  4013                     source next
  4025                     source next
  4014                 ] ifFalse:[
  4026                 ] ifFalse:[
  4015                     inString := false
  4027                     inString := false
  4016                 ]
  4028                 ]
  4017             ] ifFalse:[
  4029             ] ifFalse:[
  4018                 escapeStyle notNil ifTrue:[
  4030                 (escapeStyle notNil and:[isRString not]) ifTrue:[
  4019                     ((nextChar == ${) and:[isEString]) ifTrue:[
  4031                     ((nextChar == ${) and:[isEString]) ifTrue:[
  4020                         "/ bail out, to read one expression
  4032                         "/ bail out, to read one expression
  4021                         tokenValue := token := string copyTo:(index - 1).
  4033                         tokenValue := token := string copyTo:(index - 1).
  4022                         tokenType := #StringFragment.
  4034                         tokenType := #StringFragment.
  4023                         ^ tokenType
  4035                         ^ tokenType
  4053             nextChar := source next
  4065             nextChar := source next
  4054         ]
  4066         ]
  4055     ].
  4067     ].
  4056 
  4068 
  4057     tokenValue := token := string copyTo:(index - 1).
  4069     tokenValue := token := string copyTo:(index - 1).
  4058     tokenType := #String.
  4070     tokenType := isRString ifTrue:[#RegexString] ifFalse:[#String].
  4059     ^ tokenType
  4071     ^ tokenType
  4060 
  4072 
  4061     "Created: / 22-05-2019 / 20:31:36 / Claus Gittinger"
  4073     "Created: / 22-05-2019 / 20:31:36 / Claus Gittinger"
  4062     "Modified: / 23-05-2019 / 10:45:19 / Claus Gittinger"
  4074     "Modified: / 03-06-2019 / 11:10:24 / Claus Gittinger"
  4063 ! !
  4075 ! !
  4064 
  4076 
  4065 !Scanner::Comment methodsFor:'accessing'!
  4077 !Scanner::Comment methodsFor:'accessing'!
  4066 
  4078 
  4067 commentString
  4079 commentString