|
1 'From Smalltalk/X, Version:2.10.8 on 1-feb-1996 at 6:09:27 pm' ! |
|
2 |
|
3 ReadStream subclass:#TokenizedStream |
|
4 instanceVariableNames:'source token tokenType tokenPosition tokenName tokenLineNr |
|
5 tokenValue tokenRadix hereChar peekChar peekChar2 |
|
6 beginCommentCharacter endCommentCharacter eolCommentCharacter |
|
7 eolCharacter outStream outCol actions types eolIsSignificant' |
|
8 classVariableNames:'DefaultActions DefaultTypes' |
|
9 poolDictionaries:'' |
|
10 category:'Streams' |
|
11 ! |
|
12 |
|
13 !TokenizedStream class methodsFor:'documentation'! |
|
14 |
|
15 documentation |
|
16 " |
|
17 a first version of a tokenStream. |
|
18 This is still being constructed and will finally help a simplified |
|
19 Scanner class. |
|
20 For now, it may be useful when textual input files are to be read and |
|
21 parsed. For example, ascii data files are often in a simple free form format |
|
22 which requires some little processing. |
|
23 |
|
24 operation: |
|
25 |
|
26 a TokenizedStream reads characters from its real input stream |
|
27 and dispatches to a toke reading method by the help of an actionArray, |
|
28 which is indexed by the characters ascii code. |
|
29 By default, the table is setup to only read numbers (integers) |
|
30 and identifiers. Whitespace is ignored, and all other characters return |
|
31 themself. |
|
32 |
|
33 The returned tokens are either symbols (#Identifier / #Integer) or |
|
34 characters ($+ $, etc.) |
|
35 If its an Identifier, the name is found in tokenName (there is an access method for that). |
|
36 If its an Integer, the value is found in tokenValue. |
|
37 |
|
38 EndOfLine is returned as #EOL; end of input as #EOF. |
|
39 Unrecognized input leads to #Error to be returned. |
|
40 " |
|
41 ! |
|
42 |
|
43 examples |
|
44 " |
|
45 simple example: |
|
46 |
|
47 |s| |
|
48 |
|
49 s := TokenizedStream on:'hello world, how much is 3 + 2'. |
|
50 [s atEnd] whileFalse:[ |
|
51 Transcript showCr:(s next). |
|
52 ]. |
|
53 |
|
54 |
|
55 simple example2: |
|
56 |
|
57 |s token| |
|
58 |
|
59 s := TokenizedStream on:'foo bar baz 3 + 2'. |
|
60 [s atEnd] whileFalse:[ |
|
61 token := s next. |
|
62 token == #Identifier ifTrue:[ |
|
63 Transcript showCr:(token , ' name=' , s tokenName). |
|
64 ] ifFalse:[ |
|
65 token == #Integer ifTrue:[ |
|
66 Transcript showCr:(token , ' value=' , s tokenValue printString). |
|
67 ] ifFalse:[ |
|
68 Transcript showCr:token. |
|
69 ] |
|
70 ] |
|
71 ]. |
|
72 |
|
73 |
|
74 reading expressions: |
|
75 |
|
76 |s num1 num2| |
|
77 |
|
78 s := TokenizedStream on:' |
|
79 3 + 2 |
|
80 4 + 6 |
|
81 1 + 2 |
|
82 '. |
|
83 [s atEnd] whileFalse:[ |
|
84 s next == #Integer ifTrue:[ |
|
85 num1 := s tokenValue. |
|
86 s next == $+ ifTrue:[ |
|
87 s next == #Integer ifTrue:[ |
|
88 num2 := s tokenValue. |
|
89 Transcript showCr:num1 printString |
|
90 , ' + ' |
|
91 , num2 printString |
|
92 , ' => ' |
|
93 , (num1 + num2) printString. |
|
94 ] |
|
95 ] |
|
96 ] |
|
97 ]. |
|
98 |
|
99 |
|
100 with eol-comments: |
|
101 |
|
102 |s num1 num2| |
|
103 |
|
104 s := TokenizedStream on:' |
|
105 3 + 2 |
|
106 ; this is a comment |
|
107 4 + 6 |
|
108 1 + 2 |
|
109 '. |
|
110 s eolCommentCharacter:$;. |
|
111 |
|
112 [s atEnd] whileFalse:[ |
|
113 s next == #Integer ifTrue:[ |
|
114 num1 := s tokenValue. |
|
115 s next == $+ ifTrue:[ |
|
116 s next == #Integer ifTrue:[ |
|
117 num2 := s tokenValue. |
|
118 Transcript showCr:num1 printString |
|
119 , ' + ' |
|
120 , num2 printString |
|
121 , ' => ' |
|
122 , (num1 + num2) printString. |
|
123 ] |
|
124 ] |
|
125 ] |
|
126 ]. |
|
127 |
|
128 |
|
129 scan /etc/services file: |
|
130 |
|
131 |s t service port protocol| |
|
132 |
|
133 s := TokenizedStream on:'/etc/services' asFilename readStream. |
|
134 s eolCommentCharacter:$#. |
|
135 s typeTable at:($- asciiValue) put:#letter. |
|
136 |
|
137 [s atEnd] whileFalse:[ |
|
138 t := s next. |
|
139 t == #Identifier ifTrue:[ |
|
140 service := s tokenName. |
|
141 t := s next. |
|
142 t == #Integer ifTrue:[ |
|
143 port := s tokenValue. |
|
144 s next == $/ ifTrue:[ |
|
145 t := s next. |
|
146 t == #Identifier ifTrue:[ |
|
147 protocol := s tokenName. |
|
148 Transcript showCr:(service , ' is ' , protocol , ' port=' , port printString). |
|
149 ] |
|
150 ] |
|
151 ] |
|
152 ]. |
|
153 s skipToEol |
|
154 ] |
|
155 " |
|
156 ! ! |
|
157 |
|
158 !TokenizedStream class methodsFor:'initialization'! |
|
159 |
|
160 initialize |
|
161 |block| |
|
162 |
|
163 DefaultActions := Array new:256. |
|
164 DefaultTypes := Array new:256. |
|
165 |
|
166 "kludge: action is nextColonOrAssign, but type is special" |
|
167 2 to:255 do:[:code | |
|
168 DefaultTypes at:code put:(Character value:code). |
|
169 ]. |
|
170 |
|
171 block := [:s :char | s nextInteger]. |
|
172 ($0 asciiValue) to:($9 asciiValue) do:[:index | |
|
173 DefaultTypes at:index put:#digit. |
|
174 DefaultActions at:index put:block |
|
175 ]. |
|
176 |
|
177 block := [:s :char | s nextIdentifier]. |
|
178 ($a asciiValue) to:($z asciiValue) do:[:index | |
|
179 DefaultTypes at:index put:#letter. |
|
180 DefaultActions at:index put:block |
|
181 ]. |
|
182 ($A asciiValue) to:($Z asciiValue) do:[:index | |
|
183 DefaultTypes at:index put:#letter. |
|
184 DefaultActions at:index put:block |
|
185 ]. |
|
186 |
|
187 " |
|
188 TokenizedStream initialize |
|
189 " |
|
190 ! ! |
|
191 |
|
192 !TokenizedStream class methodsFor:'instance creation'! |
|
193 |
|
194 on:aStream |
|
195 ^ self basicNew on:aStream |
|
196 ! ! |
|
197 |
|
198 !TokenizedStream methodsFor:'accessing'! |
|
199 |
|
200 actionTable |
|
201 ^ actions |
|
202 |
|
203 "Created: 1.2.1996 / 17:42:00 / cg" |
|
204 ! |
|
205 |
|
206 beginCommentCharacter:aCharacter |
|
207 beginCommentCharacter := aCharacter |
|
208 |
|
209 "Created: 1.2.1996 / 17:38:01 / cg" |
|
210 ! |
|
211 |
|
212 endCommentCharacter:aCharacter |
|
213 endCommentCharacter := aCharacter |
|
214 |
|
215 "Created: 1.2.1996 / 17:38:06 / cg" |
|
216 ! |
|
217 |
|
218 eolCommentCharacter:aCharacter |
|
219 eolCommentCharacter := aCharacter |
|
220 |
|
221 "Created: 1.2.1996 / 17:37:51 / cg" |
|
222 ! |
|
223 |
|
224 tokenName |
|
225 ^ tokenName |
|
226 |
|
227 "Created: 1.2.1996 / 17:46:48 / cg" |
|
228 ! |
|
229 |
|
230 tokenType |
|
231 ^ tokenType |
|
232 |
|
233 "Created: 1.2.1996 / 17:26:24 / cg" |
|
234 ! |
|
235 |
|
236 tokenValue |
|
237 ^ tokenValue |
|
238 |
|
239 "Created: 1.2.1996 / 17:26:30 / cg" |
|
240 ! |
|
241 |
|
242 typeTable |
|
243 ^ types |
|
244 |
|
245 "Created: 1.2.1996 / 17:41:54 / cg" |
|
246 ! ! |
|
247 |
|
248 !TokenizedStream methodsFor:'initialization'! |
|
249 |
|
250 initialize |
|
251 tokenLineNr := 1. |
|
252 eolCommentCharacter := beginCommentCharacter := endCommentCharacter := nil. |
|
253 eolCharacter := Character cr. |
|
254 eolIsSignificant := false. |
|
255 |
|
256 actions := DefaultActions. |
|
257 types := DefaultTypes. |
|
258 |
|
259 "Modified: 1.2.1996 / 17:36:56 / cg" |
|
260 ! ! |
|
261 |
|
262 !TokenizedStream methodsFor:'private'! |
|
263 |
|
264 on:aStringOrStream |
|
265 self initialize. |
|
266 |
|
267 aStringOrStream isStream ifFalse:[ |
|
268 source := ReadStream on:aStringOrStream |
|
269 ] ifTrue:[ |
|
270 source := aStringOrStream. |
|
271 ]. |
|
272 |
|
273 "Created: 1.2.1996 / 16:18:34 / cg" |
|
274 "Modified: 1.2.1996 / 16:18:47 / cg" |
|
275 ! ! |
|
276 |
|
277 !TokenizedStream methodsFor:'reading'! |
|
278 |
|
279 next |
|
280 ^ self nextToken |
|
281 |
|
282 "Created: 1.2.1996 / 17:21:47 / cg" |
|
283 ! |
|
284 |
|
285 nextIdentifier |
|
286 |nextChar string oldString |
|
287 index "{ Class: SmallInteger }" |
|
288 max "{ Class: SmallInteger }" |
|
289 t done| |
|
290 |
|
291 nextChar := source peek. |
|
292 string := String basicNew:20. |
|
293 index := 0. |
|
294 max := 10. |
|
295 |
|
296 done := false. |
|
297 [done] whileFalse:[ |
|
298 nextChar isNil ifTrue:[ |
|
299 done := true |
|
300 ] ifFalse:[ |
|
301 t := types at:(nextChar asciiValue). |
|
302 done := (t ~~ #letter and:[t ~~ #digit]). |
|
303 ]. |
|
304 done ifFalse:[ |
|
305 (index == max) ifTrue:[ |
|
306 oldString := string. |
|
307 string := String basicNew:(max * 2). |
|
308 string replaceFrom:1 to:max with:oldString. |
|
309 max := max * 2 |
|
310 ]. |
|
311 index := index + 1. |
|
312 string at:index put:nextChar. |
|
313 nextChar := source nextPeek |
|
314 ] |
|
315 ]. |
|
316 tokenType := #Identifier. |
|
317 tokenName := string copyTo:index. |
|
318 ^ tokenType |
|
319 |
|
320 "Created: 1.2.1996 / 16:35:53 / cg" |
|
321 "Modified: 1.2.1996 / 17:51:59 / cg" |
|
322 ! |
|
323 |
|
324 nextInteger |
|
325 tokenValue := Integer readFrom:source radix:10. |
|
326 tokenRadix := 10. |
|
327 tokenType := #Integer. |
|
328 ^ tokenType |
|
329 |
|
330 "Created: 1.2.1996 / 16:37:03 / cg" |
|
331 "Modified: 1.2.1996 / 16:37:28 / cg" |
|
332 ! |
|
333 |
|
334 nextString:separator |
|
335 |nextChar string pos |
|
336 index "{ Class: SmallInteger }" |
|
337 len "{ Class: SmallInteger }" |
|
338 inString| |
|
339 |
|
340 string := String basicNew:20. |
|
341 len := 20. |
|
342 index := 1. |
|
343 pos := source position. |
|
344 source next. |
|
345 nextChar := source next. |
|
346 inString := true. |
|
347 |
|
348 [inString] whileTrue:[ |
|
349 nextChar isNil ifTrue:[ |
|
350 self error:'unexpected end-of-input in String'. |
|
351 tokenType := #EOF. |
|
352 ^ tokenType |
|
353 ]. |
|
354 (nextChar == Character cr) ifTrue:[ |
|
355 tokenLineNr := tokenLineNr + 1 |
|
356 ]. |
|
357 (nextChar == separator) ifTrue:[ |
|
358 (source peek == separator) ifTrue:[ |
|
359 source next |
|
360 ] ifFalse:[ |
|
361 inString := false |
|
362 ] |
|
363 ]. |
|
364 inString ifTrue:[ |
|
365 string at:index put:nextChar. |
|
366 (index == len) ifTrue:[ |
|
367 string := string , (String new:len). |
|
368 len := len * 2 |
|
369 ]. |
|
370 index := index + 1. |
|
371 nextChar := source next |
|
372 ] |
|
373 ]. |
|
374 tokenValue := string copyTo:(index - 1). |
|
375 tokenType := #String. |
|
376 ^ tokenType |
|
377 |
|
378 "Created: 1.2.1996 / 16:39:48 / cg" |
|
379 ! |
|
380 |
|
381 nextToken |
|
382 "return the next token from the source-stream" |
|
383 |
|
384 |skipping actionBlock| |
|
385 |
|
386 peekChar notNil ifTrue:[ |
|
387 hereChar := peekChar. |
|
388 peekChar := peekChar2. |
|
389 peekChar2 := nil |
|
390 ] ifFalse:[ |
|
391 skipping := true. |
|
392 [skipping] whileTrue:[ |
|
393 outStream notNil ifTrue:[ |
|
394 [(hereChar := source peek) == Character space] whileTrue:[ |
|
395 source next. |
|
396 outStream space. |
|
397 outCol := outCol + 1. |
|
398 ] |
|
399 ] ifFalse:[ |
|
400 hereChar := source skipSeparatorsExceptCR. |
|
401 ]. |
|
402 hereChar isNil ifTrue:[ |
|
403 tokenType := #EOF. |
|
404 ^ tokenType |
|
405 ]. |
|
406 hereChar == eolCharacter ifTrue:[ |
|
407 tokenLineNr := tokenLineNr + 1. |
|
408 source next. |
|
409 outStream notNil ifTrue:[ |
|
410 outStream cr. |
|
411 outCol := 1 |
|
412 ]. |
|
413 eolIsSignificant ifTrue:[ |
|
414 tokenType := #EOL. |
|
415 ^ tokenType |
|
416 ] |
|
417 ] ifFalse:[ |
|
418 hereChar == beginCommentCharacter ifTrue:[ |
|
419 "start of a comment" |
|
420 |
|
421 self skipComment. |
|
422 hereChar := source peek. |
|
423 ] ifFalse:[ |
|
424 hereChar == eolCommentCharacter ifTrue:[ |
|
425 "start of an eol comment" |
|
426 |
|
427 self skipEolComment. |
|
428 hereChar := source peek. |
|
429 ] ifFalse:[ |
|
430 skipping := false |
|
431 ] |
|
432 ] |
|
433 ] |
|
434 ]. |
|
435 hereChar isNil ifTrue:[ |
|
436 tokenType := #EOF. |
|
437 ^ tokenType |
|
438 ] |
|
439 ]. |
|
440 tokenPosition := source position. |
|
441 |
|
442 actions notNil ifTrue:[ |
|
443 actionBlock := actions at:(hereChar asciiValue). |
|
444 actionBlock notNil ifTrue:[ |
|
445 ^ actionBlock value:self value:hereChar |
|
446 ] |
|
447 ]. |
|
448 |
|
449 types notNil ifTrue:[ |
|
450 source next. |
|
451 tokenType := types at:(hereChar asciiValue). |
|
452 tokenType notNil ifTrue:[ |
|
453 ^ tokenType |
|
454 ] |
|
455 ]. |
|
456 |
|
457 tokenType := #Error. |
|
458 ^ #Error |
|
459 |
|
460 "Modified: 1.2.1996 / 17:39:20 / cg" |
|
461 ! |
|
462 |
|
463 skipComment |
|
464 source next. |
|
465 hereChar := source peek. |
|
466 |
|
467 [hereChar notNil and:[hereChar ~~ endCommentCharacter]] whileTrue:[ |
|
468 hereChar == eolCharacter ifTrue:[ |
|
469 tokenLineNr := tokenLineNr + 1. |
|
470 ]. |
|
471 outStream notNil ifTrue:[ |
|
472 outStream nextPut:hereChar. |
|
473 outCol := outCol + 1 |
|
474 ]. |
|
475 hereChar := source nextPeek |
|
476 ]. |
|
477 |
|
478 "Created: 1.2.1996 / 17:35:24 / cg" |
|
479 "Modified: 1.2.1996 / 17:37:21 / cg" |
|
480 ! |
|
481 |
|
482 skipEolComment |
|
483 source next. |
|
484 self skipToEol |
|
485 |
|
486 "Created: 1.2.1996 / 17:34:17 / cg" |
|
487 "Modified: 1.2.1996 / 18:06:33 / cg" |
|
488 ! |
|
489 |
|
490 skipToEol |
|
491 hereChar := source peek. |
|
492 |
|
493 [hereChar notNil and:[hereChar ~~ eolCharacter]] whileTrue:[ |
|
494 outStream notNil ifTrue:[ |
|
495 outStream nextPut:hereChar. |
|
496 outCol := outCol + 1 |
|
497 ]. |
|
498 hereChar := source nextPeek. |
|
499 ]. |
|
500 tokenLineNr := tokenLineNr + 1. |
|
501 |
|
502 "Created: 1.2.1996 / 18:06:09 / cg" |
|
503 "Modified: 1.2.1996 / 18:06:36 / cg" |
|
504 ! ! |
|
505 |
|
506 !TokenizedStream methodsFor:'testing'! |
|
507 |
|
508 atEnd |
|
509 ^ source atEnd or:[tokenType == #Error or:[tokenType == #EOF]] |
|
510 |
|
511 "Created: 1.2.1996 / 17:21:28 / cg" |
|
512 "Modified: 1.2.1996 / 17:30:25 / cg" |
|
513 ! ! |
|
514 |
|
515 !TokenizedStream class methodsFor:'documentation'! |
|
516 |
|
517 version |
|
518 ^ '$Header: /cvs/stx/stx/libbasic2/TokenizedStream.st,v 1.1 1996-02-01 17:10:11 cg Exp $' |
|
519 ! ! |
|
520 TokenizedStream initialize! |