author | Stefan Vogel <sv@exept.de> |
Mon, 13 Mar 2017 17:15:53 +0100 | |
changeset 21642 | b5c258f019ae |
parent 21522 | c4fe9f268c0b |
child 22415 | 769a6b3f0a19 |
permissions | -rw-r--r-- |
8148 | 1 |
" |
2 |
COPYRIGHT (c) 2004 by eXept Software AG |
|
3 |
All Rights Reserved |
|
4 |
||
5 |
This software is furnished under a license and may be used |
|
6 |
only in accordance with the terms of that license and with the |
|
7 |
inclusion of the above copyright notice. This software may not |
|
8 |
be provided or otherwise made available to, or used by, any |
|
9 |
other person. No title to or ownership of the software is |
|
10 |
hereby transferred. |
|
11 |
" |
|
8047 | 12 |
"{ Package: 'stx:libbasic' }" |
13 |
||
17331 | 14 |
"{ NameSpace: Smalltalk }" |
15 |
||
8447
9bdea3431846
EncodedStream inherts from PeekableStream
Stefan Vogel <sv@exept.de>
parents:
8444
diff
changeset
|
16 |
PeekableStream subclass:#EncodedStream |
15618 | 17 |
instanceVariableNames:'encoder stream' |
18 |
classVariableNames:'' |
|
19 |
poolDictionaries:'' |
|
15963 | 20 |
category:'Collections-Text-Encodings' |
8047 | 21 |
! |
22 |
||
8148 | 23 |
!EncodedStream class methodsFor:'documentation'! |
24 |
||
25 |
copyright |
|
26 |
" |
|
27 |
COPYRIGHT (c) 2004 by eXept Software AG |
|
28 |
All Rights Reserved |
|
29 |
||
30 |
This software is furnished under a license and may be used |
|
31 |
only in accordance with the terms of that license and with the |
|
32 |
inclusion of the above copyright notice. This software may not |
|
33 |
be provided or otherwise made available to, or used by, any |
|
34 |
other person. No title to or ownership of the software is |
|
35 |
hereby transferred. |
|
36 |
" |
|
15049 | 37 |
! |
38 |
||
39 |
documentation |
|
40 |
" |
|
15664 | 41 |
a stream which transparently decodes from an external decoding, |
42 |
looking for '{ Encoding: xxx' near the beginning of the file. |
|
15049 | 43 |
|
15664 | 44 |
especially targeted towards reading ST/X source files. |
15049 | 45 |
" |
8148 | 46 |
! ! |
8047 | 47 |
|
48 |
!EncodedStream class methodsFor:'instance creation'! |
|
49 |
||
50 |
stream:streamArg encoder:encoder |
|
51 |
^ (self basicNew) stream:streamArg; encoder:encoder |
|
16394 | 52 |
|
53 |
" |
|
54 |
|s| |
|
55 |
s := EncodedStream stream:Transcript encoder:(CharacterEncoder encoderToEncodeFrom:#utf8 into:#unicode). |
|
21521 | 56 |
s nextPutAll:('öäü' utf8Encoded) |
16394 | 57 |
" |
18763 | 58 |
! |
59 |
||
60 |
stream:streamArg encoding:encodingSymbol |
|
61 |
^ self stream:streamArg encoder:(CharacterEncoder encoderFor:encodingSymbol) |
|
62 |
||
63 |
" |
|
64 |
|baseStream s| |
|
65 |
baseStream := '' readWriteStream. |
|
66 |
s := EncodedStream stream:baseStream encoding:#utf8. |
|
21521 | 67 |
s nextPutAll:'öäü'. |
18763 | 68 |
baseStream reset; contents. |
69 |
" |
|
8047 | 70 |
! ! |
71 |
||
17650 | 72 |
!EncodedStream class methodsFor:'Compatibility-VW5.4'! |
73 |
||
74 |
on: aStream encodedBy: aStreamEncoder |
|
75 |
||
76 |
^self basicNew on: aStream encodedBy: aStreamEncoder |
|
77 |
! ! |
|
78 |
||
12610 | 79 |
!EncodedStream class methodsFor:'utilities'! |
80 |
||
81 |
decodedStreamFor:aStream |
|
15345 | 82 |
"given a positionable stream, guess its encoding (by reading the |
83 |
first few lines, looking for a string with an encoding hint, |
|
84 |
and return an appropriate encoded string, which does the decoding |
|
85 |
on the fly. Used mostly to read UTF8 files (source code)" |
|
86 |
||
18763 | 87 |
|encodingSymbol decodedStream| |
12610 | 88 |
|
15664 | 89 |
aStream inputStream isPositionable ifTrue:[ |
90 |
encodingSymbol := CharacterEncoder guessEncodingOfStream:aStream inputStream. |
|
18763 | 91 |
decodedStream := self stream:aStream encoding:encodingSymbol. |
15963 | 92 |
"JV@2012-03-27: NO, DO NOT CHANGE POSITION!! Caller might be interested |
93 |
in all data!!!!!!" |
|
94 |
"/decodedStream skipEncodingChunk. |
|
15049 | 95 |
] ifFalse:[ |
96 |
"/ setup for no-encoding; |
|
97 |
"/ switch to a real encoder later, |
|
15664 | 98 |
"/ whenever an encoding pragma is encountered later by #nextChunk. |
99 |
decodedStream := self stream:aStream encoder:CharacterEncoder nullEncoderInstance. |
|
14095 | 100 |
]. |
12610 | 101 |
^ decodedStream |
15963 | 102 |
|
103 |
"Modified: / 23-08-2013 / 17:30:58 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
15049 | 104 |
! |
105 |
||
106 |
encoderFor:anEncodingSymbol |
|
107 |
(anEncodingSymbol isNil |
|
108 |
or:[ anEncodingSymbol = #'iso8859-1' |
|
109 |
or:[ anEncodingSymbol = #'ascii' ]]) ifTrue:[ |
|
110 |
^ nil |
|
111 |
]. |
|
112 |
^ CharacterEncoder encoderFor:anEncodingSymbol. |
|
12610 | 113 |
! ! |
114 |
||
17650 | 115 |
!EncodedStream methodsFor:'Compatibility-VW5.4'! |
116 |
||
117 |
on:aStream encodedBy:aStreamEncoder |
|
118 |
"Initialize the receiver on aStream with aStreamEncoder." |
|
119 |
||
120 |
"/ skipSize := PositionRecord new. |
|
121 |
"/ aStreamEncoder skipRecord: skipSize. |
|
122 |
"/ binary := false. |
|
123 |
"/ lineEndConvention == nil |
|
124 |
"/ ifTrue: |
|
125 |
"/ [aStream needsFileLineEndConversion |
|
126 |
"/ ifTrue: [self lineEndConvention: IOAccessor defaultLineEndConvention] |
|
127 |
"/ ifFalse: [self lineEndConvention: LineEndTransparent]]. |
|
128 |
self stream: aStream. |
|
129 |
self encoder: aStreamEncoder |
|
130 |
! ! |
|
131 |
||
8047 | 132 |
!EncodedStream methodsFor:'accessing'! |
133 |
||
11992 | 134 |
contentsSpecies |
135 |
||
136 |
"Not sure if this is ok" |
|
137 |
||
138 |
^ stream contentsSpecies. |
|
139 |
||
140 |
"/ ^UnicodeString |
|
141 |
||
142 |
"Created: / 14-06-2005 / 17:11:01 / janfrog" |
|
143 |
! |
|
144 |
||
8047 | 145 |
encoder |
146 |
^ encoder |
|
147 |
! |
|
148 |
||
149 |
encoder:something |
|
150 |
encoder := something. |
|
151 |
! |
|
152 |
||
21642 | 153 |
encoding |
154 |
^ encoder nameOfEncoding |
|
155 |
||
156 |
"Created: / 13-03-2017 / 11:17:56 / stefan" |
|
157 |
! |
|
158 |
||
15593 | 159 |
inputStream |
160 |
^ stream inputStream |
|
161 |
! |
|
162 |
||
163 |
lineNumber |
|
164 |
^ stream lineNumber |
|
165 |
! |
|
166 |
||
12646 | 167 |
pathName |
20477 | 168 |
"if our base stream has a pathname, delegate..." |
12646 | 169 |
|
170 |
stream isNil ifTrue:[ |
|
171 |
^ nil. |
|
172 |
]. |
|
173 |
^ stream pathName. |
|
174 |
! |
|
175 |
||
11766 | 176 |
readStream |
21472 | 177 |
"read from self" |
178 |
||
11766 | 179 |
^ self |
21472 | 180 |
|
181 |
"Modified (comment): / 16-02-2017 / 15:59:52 / stefan" |
|
11766 | 182 |
! |
183 |
||
8047 | 184 |
stream |
185 |
^ stream |
|
186 |
! |
|
187 |
||
188 |
stream:something |
|
189 |
stream := something. |
|
190 |
! ! |
|
191 |
||
21472 | 192 |
!EncodedStream methodsFor:'chunk input/output'! |
15963 | 193 |
|
21472 | 194 |
nextChunk |
195 |
"as a side effect, check for an encoding chunk" |
|
196 |
||
197 |
|prevEncoder chunk| |
|
15963 | 198 |
|
21472 | 199 |
chunk := stream nextChunk. |
200 |
chunk isEmptyOrNil ifTrue:[ |
|
201 |
^ chunk. |
|
15963 | 202 |
]. |
203 |
||
21472 | 204 |
prevEncoder := encoder. |
205 |
(prevEncoder isNullEncoder and:[stream isPositionable not]) ifTrue:[ |
|
206 |
"/ not already checked |
|
207 |
"/ check if we need lazy setup of the encoder |
|
208 |
"/ (used with non-positionable streams) |
|
209 |
(chunk includesString:'{ Encoding:') ifTrue:[ |
|
210 |
|enc| |
|
15963 | 211 |
|
21472 | 212 |
enc := self class encoderFor:(CharacterEncoder guessEncodingOfBuffer:chunk). |
213 |
enc notNil ifTrue:[ |
|
214 |
prevEncoder := encoder := enc. |
|
215 |
]. |
|
15963 | 216 |
] |
217 |
]. |
|
21472 | 218 |
^ prevEncoder decodeString:chunk |
15963 | 219 |
|
21472 | 220 |
"Modified: / 16-02-2017 / 14:54:57 / stefan" |
15963 | 221 |
! ! |
222 |
||
21642 | 223 |
!EncodedStream methodsFor:'queries'! |
224 |
||
225 |
isEncoderFor:encodingString |
|
226 |
^ encoder isEncoderFor:encodingString |
|
227 |
||
228 |
"Created: / 13-03-2017 / 11:20:20 / stefan" |
|
229 |
! ! |
|
230 |
||
8047 | 231 |
!EncodedStream methodsFor:'stream protocol'! |
232 |
||
8175 | 233 |
atEnd |
234 |
^ stream atEnd |
|
235 |
! |
|
236 |
||
8165 | 237 |
close |
238 |
stream close |
|
239 |
! |
|
240 |
||
21521 | 241 |
collection |
21522 | 242 |
"return the underlying container; nil, if there is none (eg. external streams). |
243 |
Here we return nil, as the underlying collection (if any) is useless to the outside world" |
|
21521 | 244 |
|
21522 | 245 |
^ nil |
21521 | 246 |
|
247 |
"Created: / 22-02-2017 / 11:15:00 / cg" |
|
248 |
! |
|
249 |
||
13400 | 250 |
contents |
251 |
||
18157 | 252 |
^String streamContents: [:s| |
253 |
[ self atEnd ] whileFalse:[ |
|
254 |
|ch| |
|
255 |
ch := self next. |
|
256 |
"/ decoder may decide to return nil from #next, even though the |
|
257 |
"/ underlying stream was not at the end before. This is probably a bug... |
|
258 |
ch notNil ifTrue:[ |
|
259 |
s nextPut: ch |
|
260 |
] |
|
13400 | 261 |
] |
18157 | 262 |
] |
13400 | 263 |
|
264 |
"Created: / 25-02-2010 / 23:34:28 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
265 |
! |
|
266 |
||
8213 | 267 |
cr |
8459 | 268 |
self nextPutAll:(Character cr asString) |
8213 | 269 |
! |
270 |
||
11992 | 271 |
emphasis:anObject |
272 |
||
273 |
stream emphasis:anObject |
|
274 |
||
275 |
"Created: / 15-06-2005 / 11:16:33 / janfrog" |
|
276 |
! |
|
277 |
||
15477 | 278 |
flush |
279 |
stream flush |
|
280 |
! |
|
281 |
||
20363 | 282 |
isEmpty |
283 |
^ stream isEmpty |
|
284 |
! |
|
285 |
||
11946 | 286 |
isOpen |
12690 | 287 |
^ stream notNil and:[stream isOpen] |
11946 | 288 |
! |
289 |
||
11766 | 290 |
next |
11992 | 291 |
|
292 |
^encoder readNextCharacterFrom:stream |
|
293 |
||
294 |
"Created: / 14-06-2005 / 17:01:39 / janfrog" |
|
295 |
! |
|
11766 | 296 |
|
11992 | 297 |
next:charactersToRead |
298 |
||
299 |
^encoder readNext:charactersToRead charactersFrom:stream |
|
300 |
||
301 |
"Created: / 16-06-2005 / 11:43:43 / masca" |
|
11766 | 302 |
! |
303 |
||
21472 | 304 |
nextPut:aCharacter |
305 |
encoder encodeCharacter:aCharacter on:stream. |
|
8175 | 306 |
|
21472 | 307 |
"Modified: / 16-02-2017 / 16:22:23 / stefan" |
8047 | 308 |
! |
309 |
||
310 |
nextPutAll:aCollection |
|
17668 | 311 |
encoder encodeString:aCollection on:stream |
8147 | 312 |
! |
313 |
||
17331 | 314 |
nextPutAll:aCollection startingAt:start to:stop |
17668 | 315 |
encoder encodeString:(aCollection copyFrom:start to:stop) on:stream. |
17331 | 316 |
! |
317 |
||
8444 | 318 |
peek |
11992 | 319 |
|
320 |
^stream peek |
|
321 |
||
322 |
"Created: / 20-06-2005 / 10:13:03 / masca" |
|
323 |
"Modified: / 20-06-2005 / 13:06:14 / masca" |
|
8444 | 324 |
! |
325 |
||
8175 | 326 |
peekFor:aCharacter |
327 |
^ stream peekFor:aCharacter |
|
328 |
! |
|
329 |
||
8165 | 330 |
position |
331 |
^ stream position |
|
332 |
! |
|
333 |
||
8197 | 334 |
position0Based |
15618 | 335 |
<resource: #obsolete> |
11947 | 336 |
"to be obsoleted - use position" |
337 |
||
15618 | 338 |
^ stream position |
8197 | 339 |
! |
340 |
||
341 |
position0Based:newPosition |
|
15618 | 342 |
<resource: #obsolete> |
11947 | 343 |
"to be obsoleted - use position" |
344 |
||
15618 | 345 |
stream position:newPosition |
8197 | 346 |
! |
347 |
||
8175 | 348 |
position1Based |
15618 | 349 |
<resource: #obsolete> |
11947 | 350 |
"to be obsoleted - use position" |
351 |
||
15618 | 352 |
^ stream position + 1 |
8175 | 353 |
! |
354 |
||
8147 | 355 |
position1Based:newPosition |
15618 | 356 |
<resource: #obsolete> |
11947 | 357 |
"to be obsoleted - use position" |
358 |
||
15618 | 359 |
stream position:newPosition-1 |
8175 | 360 |
! |
361 |
||
8197 | 362 |
position:newPosition |
363 |
stream position:newPosition |
|
364 |
! |
|
365 |
||
13400 | 366 |
reset |
367 |
stream reset |
|
368 |
||
369 |
"Created: / 25-02-2010 / 23:37:14 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
370 |
! |
|
371 |
||
8213 | 372 |
setToEnd |
373 |
stream setToEnd |
|
374 |
! |
|
375 |
||
14333 | 376 |
size |
20363 | 377 |
"not always correct, but probably better than 0. |
378 |
Better use #isEmpty." |
|
14333 | 379 |
|
15585 | 380 |
"/ is that better? |
381 |
"/ self error:'size of input is unknown (due to decoding)' |
|
14333 | 382 |
^ stream size |
383 |
||
384 |
"Created: / 31-08-2012 / 16:52:40 / cg" |
|
385 |
! |
|
386 |
||
11992 | 387 |
skip: anInteger |
388 |
||
389 |
"/ Should skip on character basis, not on bytes. This works for XML reader |
|
390 |
^stream skip: anInteger |
|
391 |
||
392 |
"Created: / 20-06-2005 / 13:06:06 / masca" |
|
393 |
! |
|
394 |
||
8175 | 395 |
skipSeparators |
396 |
^ stream skipSeparators |
|
15477 | 397 |
! |
398 |
||
399 |
sync |
|
400 |
stream sync |
|
401 |
! |
|
402 |
||
403 |
syncData |
|
404 |
stream syncData |
|
8047 | 405 |
! ! |
406 |
||
15049 | 407 |
!EncodedStream methodsFor:'testing'! |
408 |
||
409 |
isEncodedStream |
|
410 |
^ true |
|
15963 | 411 |
|
412 |
"Created: / 04-02-2014 / 20:27:36 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
15592 | 413 |
! |
414 |
||
16258
1582c656cdb0
Added EncodedStream>>isPositionable
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
15963
diff
changeset
|
415 |
isPositionable |
1582c656cdb0
Added EncodedStream>>isPositionable
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
15963
diff
changeset
|
416 |
^ stream isPositionable |
1582c656cdb0
Added EncodedStream>>isPositionable
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
15963
diff
changeset
|
417 |
|
1582c656cdb0
Added EncodedStream>>isPositionable
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
15963
diff
changeset
|
418 |
"Created: / 14-03-2014 / 16:18:57 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
1582c656cdb0
Added EncodedStream>>isPositionable
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
15963
diff
changeset
|
419 |
! |
1582c656cdb0
Added EncodedStream>>isPositionable
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
15963
diff
changeset
|
420 |
|
15592 | 421 |
isReadable |
422 |
^ stream isReadable |
|
423 |
! |
|
424 |
||
425 |
isWritable |
|
426 |
^ stream isWritable |
|
15049 | 427 |
! ! |
428 |
||
8712
bcef3a0c0a1d
skipEncodingCHunk now in EncodedStream
Claus Gittinger <cg@exept.de>
parents:
8459
diff
changeset
|
429 |
!EncodedStream methodsFor:'utilities'! |
bcef3a0c0a1d
skipEncodingCHunk now in EncodedStream
Claus Gittinger <cg@exept.de>
parents:
8459
diff
changeset
|
430 |
|
bcef3a0c0a1d
skipEncodingCHunk now in EncodedStream
Claus Gittinger <cg@exept.de>
parents:
8459
diff
changeset
|
431 |
skipEncodingChunk |
14091 | 432 |
|pos chunk token| |
8712
bcef3a0c0a1d
skipEncodingCHunk now in EncodedStream
Claus Gittinger <cg@exept.de>
parents:
8459
diff
changeset
|
433 |
|
13525
e17ebc11ca73
changed: #skipEncodingChunk
Claus Gittinger <cg@exept.de>
parents:
13400
diff
changeset
|
434 |
stream isPositionable ifFalse:[ |
e17ebc11ca73
changed: #skipEncodingChunk
Claus Gittinger <cg@exept.de>
parents:
13400
diff
changeset
|
435 |
^ self |
e17ebc11ca73
changed: #skipEncodingChunk
Claus Gittinger <cg@exept.de>
parents:
13400
diff
changeset
|
436 |
]. |
e17ebc11ca73
changed: #skipEncodingChunk
Claus Gittinger <cg@exept.de>
parents:
13400
diff
changeset
|
437 |
|
8712
bcef3a0c0a1d
skipEncodingCHunk now in EncodedStream
Claus Gittinger <cg@exept.de>
parents:
8459
diff
changeset
|
438 |
pos := self position. |
bcef3a0c0a1d
skipEncodingCHunk now in EncodedStream
Claus Gittinger <cg@exept.de>
parents:
8459
diff
changeset
|
439 |
chunk := self nextChunk. |
14084 | 440 |
[ |
441 |
"/ if this is a valid chunk (i.e. not a comment or encoding-directive), |
|
442 |
"/ then position back, so it will be processed as usual. |
|
14091 | 443 |
"/ We could parse here, but this is overkill, since we are only interested in the fact, |
444 |
"/ that there is anything else than a comment in the chunk. |
|
445 |
"/ result := (Parser for:chunk) |
|
446 |
"/ ignoreErrors:true; |
|
447 |
"/ ignoreWarnings:true; |
|
448 |
"/ parseMethodBody. |
|
449 |
||
450 |
token := (Scanner for:chunk) |
|
451 |
ignoreErrors:true; |
|
452 |
ignoreWarnings:true; |
|
453 |
nextToken. |
|
454 |
||
455 |
token ~= #EOF ifTrue:[ |
|
14084 | 456 |
self position:pos |
457 |
]. |
|
14086 | 458 |
] on:Parser parseWarningSignal do:[:ex| |
14084 | 459 |
"really ignore any error. |
21472 | 460 |
Even setting ignoreError will output diagnostics here |
14084 | 461 |
during standalone startup when debugging" |
14086 | 462 |
ex proceedWith:#ignore. |
14084 | 463 |
]. |
13525
e17ebc11ca73
changed: #skipEncodingChunk
Claus Gittinger <cg@exept.de>
parents:
13400
diff
changeset
|
464 |
|
e17ebc11ca73
changed: #skipEncodingChunk
Claus Gittinger <cg@exept.de>
parents:
13400
diff
changeset
|
465 |
"Modified: / 29-07-2011 / 17:42:11 / cg" |
21472 | 466 |
"Modified (format): / 16-02-2017 / 16:01:09 / stefan" |
8712
bcef3a0c0a1d
skipEncodingCHunk now in EncodedStream
Claus Gittinger <cg@exept.de>
parents:
8459
diff
changeset
|
467 |
! ! |
bcef3a0c0a1d
skipEncodingCHunk now in EncodedStream
Claus Gittinger <cg@exept.de>
parents:
8459
diff
changeset
|
468 |
|
8047 | 469 |
!EncodedStream class methodsFor:'documentation'! |
470 |
||
14086 | 471 |
version |
18763 | 472 |
^ '$Header$' |
14086 | 473 |
! |
474 |
||
13400 | 475 |
version_CVS |
18763 | 476 |
^ '$Header$' |
12610 | 477 |
! |
478 |
||
13400 | 479 |
version_SVN |
15345 | 480 |
^ '$ Id: EncodedStream.st 10643 2011-06-08 21:53:07Z vranyj1 $' |
8047 | 481 |
! ! |
15049 | 482 |