Scanner.st
changeset 4291 bd7354a6b7d7
parent 4276 05b51337c941
child 4303 4123d362d0f1
equal deleted inserted replaced
4290:9d83c5d83330 4291:bd7354a6b7d7
   176     block := [:s :char | s nextNumber].
   176     block := [:s :char | s nextNumber].
   177     actionArray from:($0 codePoint) to:($9 codePoint) put:block.
   177     actionArray from:($0 codePoint) to:($9 codePoint) put:block.
   178 
   178 
   179     block := [:s :char | s nextSpecial].
   179     block := [:s :char | s nextSpecial].
   180     self binarySelectorCharacters do:[:binop |
   180     self binarySelectorCharacters do:[:binop |
   181 	|codePoint|
   181         |codePoint|
   182 	codePoint := binop codePoint.
   182         codePoint := binop codePoint.
   183 	codePoint <= typeArray size ifTrue:[
   183         codePoint <= typeArray size ifTrue:[
   184 	    typeArray at:codePoint put:#special.
   184             typeArray at:codePoint put:#special.
   185 	    actionArray at:codePoint put:block
   185             actionArray at:codePoint put:block
   186 	] ifFalse:[
   186         ] ifFalse:[
   187 	    unicodeTypes at:codePoint put:#extendedSpecial.
   187             unicodeTypes at:codePoint put:#extendedSpecial.
   188 	    unicodeActions at:codePoint put:block
   188             unicodeActions at:codePoint put:block
   189 	].
   189         ].
   190     ].
   190     ].
   191 
   191 
   192     block := [:s :char | s nextExtendedSpecial:char].
   192     block := [:s :char | s nextExtendedSpecial:char].
   193     self extendedBinarySelectorCharacters do:[:binop |
   193     self extendedBinarySelectorCharacters do:[:binop |
   194 	|codePoint|
   194         |codePoint|
   195 	codePoint := binop codePoint.
   195         codePoint := binop codePoint.
   196 	codePoint <= typeArray size ifTrue:[
   196         codePoint <= typeArray size ifTrue:[
   197 	    typeArray at:codePoint put:#extendedSpecial.
   197             typeArray at:codePoint put:#extendedSpecial.
   198 	    actionArray at:codePoint put:block
   198             actionArray at:codePoint put:block
   199 	] ifFalse:[
   199         ] ifFalse:[
   200 	    unicodeTypes at:codePoint put:#extendedSpecial.
   200             unicodeTypes at:codePoint put:#extendedSpecial.
   201 	    unicodeActions at:codePoint put:block
   201             unicodeActions at:codePoint put:block
   202 	].
   202         ].
   203     ].
   203     ].
   204 
   204 
   205     "/ that one is a special case (both binarySelector AND syntax).
   205     "/ that one is a special case (both binarySelector AND syntax).
   206     typeArray at:($| codePoint) put:nil.
   206     typeArray at:($| codePoint) put:nil.
   207 
   207 
   208     block := [:s :char | s nextToken:char].
   208     block := [:s :char | s nextToken:char].
   209     ';.^|()[]{}' do:[:ch |
   209     ';.^|()[]{}' do:[:ch |
   210 	actionArray at:(ch codePoint) put:block
   210         actionArray at:(ch codePoint) put:block
   211     ].
   211     ].
   212 
   212 
   213     block := [:s :char | s nextIdentifier].
   213     block := [:s :char | s nextIdentifier].
   214     actionArray from:($a codePoint) to:($z codePoint) put:block.
   214     actionArray from:($a codePoint) to:($z codePoint) put:block.
   215     actionArray from:($A codePoint) to:($Z codePoint) put:block.
   215     actionArray from:($A codePoint) to:($Z codePoint) put:block.
   225     actionArray at:($# codePoint) put:[:s :char | s nextHash].
   225     actionArray at:($# codePoint) put:[:s :char | s nextHash].
   226     actionArray at:($!! codePoint) put:[:s :char | s nextExcla].
   226     actionArray at:($!! codePoint) put:[:s :char | s nextExcla].
   227     actionArray at:($% codePoint) put:[:s :char | s nextPrimitive].
   227     actionArray at:($% codePoint) put:[:s :char | s nextPrimitive].
   228     actionArray at:($: codePoint) put:[:s :char | s nextColonOrAssign].
   228     actionArray at:($: codePoint) put:[:s :char | s nextColonOrAssign].
   229     actionArray at:($_ codePoint) put:[:s :char | s nextUnderline].
   229     actionArray at:($_ codePoint) put:[:s :char | s nextUnderline].
   230 
   230     "/ an experiment to allow 'special' identifiers (in backticks, allowing arbitrary characters inside)
       
   231     "/ actionArray at:($` codePoint) put:[:s :char | s nextBacktickIdentifier].
       
   232     
   231     unicodeActions at:2190 "<- left arrow" put:[:s :char | s nextAssignmentArrow].
   233     unicodeActions at:2190 "<- left arrow" put:[:s :char | s nextAssignmentArrow].
   232 
   234 
   233     ActionArray := DefaultActionArray := actionArray.
   235     ActionArray := DefaultActionArray := actionArray.
   234     TypeArray := DefaultTypeArray := typeArray.
   236     TypeArray := DefaultTypeArray := typeArray.
   235     UnicodeActions := DefaultUnicodeActions := unicodeActions.
   237     UnicodeActions := DefaultUnicodeActions := unicodeActions.
   238      Scanner setupActions
   240      Scanner setupActions
   239      Scanner withAllSubclassesDo:[:cls | cls setupActions ]
   241      Scanner withAllSubclassesDo:[:cls | cls setupActions ]
   240     "
   242     "
   241 
   243 
   242     "Modified: / 02-07-2017 / 01:11:27 / cg"
   244     "Modified: / 02-07-2017 / 01:11:27 / cg"
       
   245     "Modified: / 07-08-2018 / 07:47:38 / Claus Gittinger"
   243 ! !
   246 ! !
   244 
   247 
   245 !Scanner class methodsFor:'instance creation'!
   248 !Scanner class methodsFor:'instance creation'!
   246 
   249 
   247 for:aStringOrStream
   250 for:aStringOrStream
  2172 !
  2175 !
  2173 
  2176 
  2174 setSource:newSource
  2177 setSource:newSource
  2175     source := newSource
  2178     source := newSource
  2176 
  2179 
       
  2180     "Modified: / 07-08-2018 / 07:46:57 / Claus Gittinger"
  2177 !
  2181 !
  2178 
  2182 
  2179 source:aStringOrStream
  2183 source:aStringOrStream
  2180     "prepare for reading from aStringOrStream;
  2184     "prepare for reading from aStringOrStream;
  2181      notice: if token is nonNil, it is preserved. This allows for scanning
  2185      notice: if token is nonNil, it is preserved. This allows for scanning
  2463     "return a left-arrow"
  2467     "return a left-arrow"
  2464 
  2468 
  2465     ^ self nextToken:$_
  2469     ^ self nextToken:$_
  2466 
  2470 
  2467     "Created: / 25-03-2011 / 13:58:50 / cg"
  2471     "Created: / 25-03-2011 / 13:58:50 / cg"
       
  2472 !
       
  2473 
       
  2474 nextBacktickIdentifier
       
  2475     "a single back-quote has been scanned; 
       
  2476      scan up to the next back-tick, and return it as an identifier"
       
  2477 
       
  2478     self nextString:$`.
       
  2479 
       
  2480     tokenName := tokenValue.
       
  2481     tokenType := #Identifier.
       
  2482     ^ tokenType
       
  2483 
       
  2484     "Created: / 07-08-2018 / 07:37:51 / Claus Gittinger"
  2468 !
  2485 !
  2469 
  2486 
  2470 nextCharacter
  2487 nextCharacter
  2471     "a $ has been read - return a character token"
  2488     "a $ has been read - return a character token"
  2472 
  2489 
  3223 
  3240 
  3224     "Modified: / 12-02-2017 / 11:02:51 / cg"
  3241     "Modified: / 12-02-2017 / 11:02:51 / cg"
  3225 !
  3242 !
  3226 
  3243 
  3227 nextString:delimiter
  3244 nextString:delimiter
  3228     "a single quote has been scanned; scan the string (caring for doubled quotes"
  3245     "a quote has been scanned; scan the string (caring for doubled quotes)"
  3229 
  3246 
  3230     |nextChar string pos
  3247     |nextChar string pos
  3231      index "{ Class: SmallInteger }"
  3248      index "{ Class: SmallInteger }"
  3232      len   "{ Class: SmallInteger }"
  3249      len   "{ Class: SmallInteger }"
  3233      inString peekChar|
  3250      inString peekChar|
  3239     source next.
  3256     source next.
  3240     nextChar := source next.
  3257     nextChar := source next.
  3241     inString := true.
  3258     inString := true.
  3242 
  3259 
  3243     [inString] whileTrue:[
  3260     [inString] whileTrue:[
  3244 	nextChar isNil ifTrue:[
  3261         nextChar isNil ifTrue:[
  3245 	    self syntaxError:'unexpected end-of-input in String'
  3262             self syntaxError:'unexpected end-of-input in String'
  3246 		    position:pos to:(source position).
  3263                     position:pos to:(source position).
  3247 	    self markStringFrom:pos to:source position.
  3264             self markStringFrom:pos to:source position.
  3248 	    token := nil.
  3265             token := nil.
  3249 	    tokenType := #EOF.
  3266             tokenType := #EOF.
  3250 	    ^ tokenType
  3267             ^ tokenType
  3251 	].
  3268         ].
  3252 	(nextChar == Character cr) ifTrue:[
  3269         (nextChar == Character cr) ifTrue:[
  3253 	    lineNr := lineNr + 1
  3270             lineNr := lineNr + 1
  3254 	] ifFalse:[
  3271         ] ifFalse:[
  3255 	    (nextChar == delimiter) ifTrue:[
  3272             (nextChar == delimiter) ifTrue:[
  3256 		(source peekOrNil == delimiter) ifTrue:[
  3273                 (source peekOrNil == delimiter) ifTrue:[
  3257 		    source next
  3274                     source next
  3258 		] ifFalse:[
  3275                 ] ifFalse:[
  3259 		    inString := false
  3276                     inString := false
  3260 		]
  3277                 ]
  3261 	    ] ifFalse:[
  3278             ] ifFalse:[
  3262 		parserFlags allowExtendedSTXSyntax == true ifTrue:[
  3279                 parserFlags allowExtendedSTXSyntax == true ifTrue:[
  3263 		    (nextChar == $\) ifTrue:[
  3280                     (nextChar == $\) ifTrue:[
  3264 			peekChar := source peekOrNil.
  3281                         peekChar := source peekOrNil.
  3265 			peekChar notNil ifTrue:[
  3282                         peekChar notNil ifTrue:[
  3266 			    source next.
  3283                             source next.
  3267 			    nextChar := self escapeCharacterFor:peekChar.
  3284                             nextChar := self escapeCharacterFor:peekChar.
  3268 			]
  3285                         ]
  3269 		    ]
  3286                     ]
  3270 		]
  3287                 ]
  3271 	    ].
  3288             ].
  3272 	].
  3289         ].
  3273 	inString ifTrue:[
  3290         inString ifTrue:[
  3274 	    nextChar notNil ifTrue:[
  3291             nextChar notNil ifTrue:[
  3275 		nextChar codePoint > 255 ifTrue:[
  3292                 nextChar codePoint > 255 ifTrue:[
  3276 		    string bitsPerCharacter < nextChar bitsPerCharacter ifTrue:[
  3293                     string bitsPerCharacter < nextChar bitsPerCharacter ifTrue:[
  3277 			nextChar codePoint > 16rFFFF ifTrue:[
  3294                         nextChar codePoint > 16rFFFF ifTrue:[
  3278 			    string := string asUnicode32String
  3295                             string := string asUnicode32String
  3279 			] ifFalse:[
  3296                         ] ifFalse:[
  3280 			    string := string asUnicode16String.
  3297                             string := string asUnicode16String.
  3281 			].
  3298                         ].
  3282 		    ].
  3299                     ].
  3283 		].
  3300                 ].
  3284 		string at:index put:nextChar.
  3301                 string at:index put:nextChar.
  3285 		(index == len) ifTrue:[
  3302                 (index == len) ifTrue:[
  3286 		    string := string , (string species new:len).
  3303                     string := string , (string species new:len).
  3287 		    len := len * 2
  3304                     len := len * 2
  3288 		].
  3305                 ].
  3289 		index := index + 1.
  3306                 index := index + 1.
  3290 	    ].
  3307             ].
  3291 	    nextChar := source next
  3308             nextChar := source next
  3292 	]
  3309         ]
  3293     ].
  3310     ].
  3294 
  3311 
  3295     tokenValue := token := string copyTo:(index - 1).
  3312     tokenValue := token := string copyTo:(index - 1).
  3296     tokenType := #String.
  3313     tokenType := #String.
  3297     ^ tokenType
  3314     ^ tokenType
  3298 
  3315 
  3299     "Created: / 01-08-2006 / 14:56:07 / cg"
  3316     "Created: / 01-08-2006 / 14:56:07 / cg"
  3300     "Modified: / 22-08-2006 / 14:10:26 / cg"
  3317     "Modified: / 22-08-2006 / 14:10:26 / cg"
       
  3318     "Modified (comment): / 07-08-2018 / 07:40:23 / Claus Gittinger"
  3301 !
  3319 !
  3302 
  3320 
  3303 nextSymbolAfterHash
  3321 nextSymbolAfterHash
  3304     "helper: a # has been read - return #Symbol token or nil"
  3322     "helper: a # has been read - return #Symbol token or nil"
  3305 
  3323