EncodedStream.st
author Claus Gittinger <cg@exept.de>
Tue, 09 Jul 2019 20:55:17 +0200
changeset 24417 03b083548da2
parent 24030 4f1f0d4e1ce4
child 24641 c8b9d72e1a36
permissions -rw-r--r--
#REFACTORING by exept class: Smalltalk class changed: #recursiveInstallAutoloadedClassesFrom:rememberIn:maxLevels:noAutoload:packageTop:showSplashInLevels: Transcript showCR:(... bindWith:...) -> Transcript showCR:... with:...

"
 COPYRIGHT (c) 2004 by eXept Software AG
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
"{ Package: 'stx:libbasic' }"

"{ NameSpace: Smalltalk }"

PeekableStream subclass:#EncodedStream
	instanceVariableNames:'encoder stream peekChar'
	classVariableNames:''
	poolDictionaries:''
	category:'Collections-Text-Encodings'
!

!EncodedStream class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 2004 by eXept Software AG
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    a stream which transparently decodes from an external decoding,
    looking for '{ Encoding: xxx' near the beginning of the file.

    especially targeted towards reading ST/X source files.
"
! !

!EncodedStream class methodsFor:'instance creation'!

stream:streamArg encoder:encoder
    ^ (self basicNew) stream:streamArg; encoder:encoder

    "
     |s|
     s := EncodedStream stream:Transcript encoder:(CharacterEncoder encoderToEncodeFrom:#utf8 into:#unicode).
     s nextPutAll:('öäü' utf8Encoded)

     |s|
     s := EncodedStream stream:('öäü' readStream) encoder:(CharacterEncoder encoderToEncodeFrom:#utf8 into:#unicode).
     s next:3
    "

    "Modified (comment): / 17-01-2018 / 13:36:28 / stefan"
!

stream:streamArg encoding:encodingSymbol
    ^ self stream:streamArg encoder:(CharacterEncoder encoderFor:encodingSymbol)

    "
     |baseStream s|
     baseStream := '' readWriteStream.
     s := EncodedStream stream:baseStream encoding:#utf8.
     s nextPutAll:'öäü'.
     baseStream reset; contents.

     s contents
    "

    "Modified (comment): / 17-01-2018 / 13:32:10 / stefan"
! !

!EncodedStream class methodsFor:'Compatibility-VW5.4'!

on: aStream encodedBy: aStreamEncoder

        ^self basicNew on: aStream encodedBy: aStreamEncoder
! !

!EncodedStream class methodsFor:'utilities'!

decodedStreamFor:aStream
    "given a positionable stream, guess its encoding (by reading the
     first few lines, looking for a string with an encoding hint,
     and return an appropriate encoded string, which does the decoding
     on the fly. Used mostly to read UTF8 files (source code)"

    |encodingSymbol decodedStream|

    aStream inputStream isPositionable ifTrue:[
        encodingSymbol := CharacterEncoder guessEncodingOfStream:aStream inputStream.
        decodedStream := self stream:aStream encoding:encodingSymbol.
        "JV@2012-03-27: NO, DO NOT CHANGE POSITION!! Caller might be interested
                        in all data!!!!!!"
        "/decodedStream skipEncodingChunk.
    ] ifFalse:[
        "/ setup for no-encoding; 
        "/ switch to a real encoder later,
        "/ whenever an encoding pragma is encountered later by #nextChunk.
        decodedStream := self stream:aStream encoder:CharacterEncoder nullEncoderInstance.
    ].
    ^ decodedStream

    "Modified: / 23-08-2013 / 17:30:58 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

encoderFor:anEncodingSymbol
    (anEncodingSymbol isNil
    or:[ anEncodingSymbol = #'iso8859-1' 
    or:[ anEncodingSymbol = #'ascii' ]]) ifTrue:[
        ^ nil
    ].
    ^ CharacterEncoder encoderFor:anEncodingSymbol.
! !

!EncodedStream methodsFor:'Compatibility-VW5.4'!

on:aStream encodedBy:aStreamEncoder
    "Initialize the receiver on aStream with aStreamEncoder."

"/    skipSize := PositionRecord new.
"/    aStreamEncoder skipRecord: skipSize.
"/    binary := false.
"/    lineEndConvention == nil 
"/            ifTrue: 
"/                    [aStream needsFileLineEndConversion 
"/                            ifTrue: [self lineEndConvention: IOAccessor defaultLineEndConvention]
"/                            ifFalse: [self lineEndConvention: LineEndTransparent]].
    self stream: aStream.
    self encoder: aStreamEncoder
! !

!EncodedStream methodsFor:'accessing'!

encoder
    ^ encoder
!

encoder:aCharacterEncoder
    encoder := aCharacterEncoder.
!

encoding
    ^ encoder nameOfEncoding

    "Created: / 13-03-2017 / 11:17:56 / stefan"
!

inputStream
    ^ stream inputStream
!

lineNumber
    "the linenumber doesn't change when characters are decoded"

    stream isNil ifTrue:[
        ^ nil.
    ].
    ^ stream lineNumber

    "Modified: / 02-01-2018 / 20:12:18 / stefan"
    "Modified (comment): / 09-01-2018 / 17:47:20 / stefan"
!

pathName
    "if our base stream has a pathname, delegate..."

    stream isNil ifTrue:[
        ^ nil.
    ].
    ^ stream pathName.
!

readStream
    "read from self"

    ^ self

    "Modified (comment): / 16-02-2017 / 15:59:52 / stefan"
!

stream
    ^ stream
!

stream:something
    stream := something.
! !

!EncodedStream methodsFor:'chunk input/output'!

nextChunk
    "reads a smalltalk chunk.
     as a side effect, check for an encoding chunk"
    
    |chunk|

    chunk := super nextChunk.

    (encoder isNullEncoder and:[stream isPositionable not]) ifTrue:[
        "/ encoding not already checked
        "/ check if we need lazy setup of the encoder
        "/ (used with non-positionable streams)
        "/ encoder for PositionableStream is set up in self class decodedStreamFor:
        (chunk notNil and:[chunk includesString:'{ Encoding:']) ifTrue:[
            |enc|

            enc := self class encoderFor:(CharacterEncoder guessEncodingOfBuffer:chunk).
            enc notNil ifTrue:[
                encoder := enc.
                ^ encoder decodeString:chunk.
            ].
        ].
    ].

    ^ chunk.

    "Modified (format): / 04-01-2018 / 00:33:57 / stefan"
! !

!EncodedStream methodsFor:'queries'!

contentsSpecies
    ^ String

    "Created: / 14-06-2005 / 17:11:01 / janfrog"
    "Modified: / 10-01-2018 / 11:43:57 / stefan"
!

isEncoderFor:encodingString
    ^ encoder isEncoderFor:encodingString

    "Created: / 13-03-2017 / 11:20:20 / stefan"
! !

!EncodedStream methodsFor:'stream protocol'!

atEnd
    ^ peekChar isNil and:[stream atEnd]

    "Modified: / 02-01-2018 / 21:47:17 / stefan"
!

close
    peekChar := nil.
    stream close

    "Modified: / 09-01-2018 / 18:33:01 / stefan"
!

collection
    "return the underlying container; nil, if there is none (eg. external streams).
     Here we return nil, as the underlying collection (if any) is useless to the outside world"
    
    ^ nil

    "Created: / 22-02-2017 / 11:15:00 / cg"
!

contents
    ^String streamContents:[:s|
        [self atEnd] whileFalse:[
            |ch|
            ch := self next.
            "/ decoder may decide to return nil from #next, even though the
            "/ underlying stream was not at the end before. This is probably a bug...
            ch notNil ifTrue:[
                s nextPut: ch
            ]
        ]
    ]

    "Created: / 25-02-2010 / 23:34:28 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    "Modified (format): / 02-01-2018 / 19:35:10 / stefan"
!

emphasis:anObject

    stream emphasis:anObject

    "Created: / 15-06-2005 / 11:16:33 / janfrog"
!

flush
    peekChar := nil.
    stream flush

    "Modified: / 02-01-2018 / 19:17:40 / stefan"
!

isEmpty
    ^ stream isEmptyOrNil.

    "Modified: / 02-01-2018 / 19:59:09 / stefan"
!

next
    peekChar notNil ifTrue:[
        |p|
        p := peekChar.
        peekChar := nil.
        ^ p.
    ].
    ^ encoder readNextCharacterFrom:stream

    "Created: / 14-06-2005 / 17:01:39 / janfrog"
    "Modified: / 02-01-2018 / 19:19:29 / stefan"
!

next:nCharactersToRead
    |chars|

    nCharactersToRead == 1 ifTrue:[
         ^ self next.
    ].
    nCharactersToRead == 0 ifTrue:[
         ^ ''.
    ].

    peekChar isNil ifTrue:[
        ^ encoder readNext:nCharactersToRead charactersFrom:stream.
    ].

    chars := encoder readNext:nCharactersToRead-1 charactersFrom:stream.
    chars := chars copyWithFirst:peekChar.
    peekChar := nil.
    ^ chars.

    "Created: / 16-06-2005 / 11:43:43 / masca"
    "Modified: / 03-01-2018 / 22:22:51 / stefan"
!

nextPut:aCharacter
    encoder encodeCharacter:aCharacter on:stream.

    "Modified: / 16-02-2017 / 16:22:23 / stefan"
!

nextPutAll:aCollection
    encoder encodeString:aCollection on:stream 
!

nextPutAll:aCollection startingAt:start to:stop
    "append the elements from first index to last index
     of the argument, aCollection onto the receiver (i.e. both outstreams)"

    encoder encodeString:(aCollection copyFrom:start to:stop) on:stream.

    "Modified (comment): / 01-04-2019 / 17:13:01 / Claus Gittinger"
!

peek
    peekChar isNil ifTrue:[
        peekChar := self next.
    ].
    ^ peekChar

    "Created: / 20-06-2005 / 10:13:03 / masca"
    "Modified: / 20-06-2005 / 13:06:14 / masca"
    "Modified (format): / 02-01-2018 / 19:21:00 / stefan"
!

position
    "only use #position/#position: to restore a previous position.
     Computing relative positions does not work!!"

    |pos|

    pos := stream position.
    peekChar notNil ifTrue:[
        "#position: nils peekChar - make sure, that it positions before peekChar"
        pos := pos - (encoder characterSize:peekChar).
    ].
    ^ pos.

    "Modified: / 15-01-2018 / 22:42:01 / stefan"
!

position0Based
    <resource: #obsolete>
    "to be obsoleted - use position"

    ^ stream position
!

position0Based:newPosition
    <resource: #obsolete>
    "to be obsoleted - use position"

    stream position:newPosition
!

position1Based
    <resource: #obsolete>
    "to be obsoleted - use position"

    ^ stream position1Based

    "Modified: / 02-01-2018 / 20:00:57 / stefan"
!

position1Based:newPosition
    <resource: #obsolete>
    "to be obsoleted - use position"

    stream position1Based:newPosition

    "Modified: / 02-01-2018 / 20:00:46 / stefan"
!

position:newPosition
    "only use #position/#position: to restore a previous position.
     Computing relative positions does not work!!
     Use #skip: to advance forward."

    peekChar := nil.
    stream position:newPosition

    "Modified (comment): / 09-01-2018 / 17:53:04 / stefan"
!

reset
    peekChar := nil.
    stream reset

    "Created: / 25-02-2010 / 23:37:14 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    "Modified: / 02-01-2018 / 19:34:20 / stefan"
!

setToEnd
    peekChar := nil.
    stream setToEnd

    "Modified (comment): / 09-01-2018 / 17:50:27 / stefan"
!

size
    "not always correct, but probably better than 0.
     Better use #isEmpty."

    "/ is that better?
    "/ self error:'size of input is unknown (due to decoding)'
    ^ stream size

    "Created: / 31-08-2012 / 16:52:40 / cg"
!

skip:nrToSkip
    "/ can only skip forward
    nrToSkip < 0 ifTrue:[
        self proceedableError:'cannot position backwards'.
        ^ self.
    ].
    nrToSkip timesRepeat:[self next]
!

sync
    stream sync
!

syncData
    stream syncData
! !

!EncodedStream methodsFor:'testing'!

isEncodedStream
    ^ true

    "Created: / 04-02-2014 / 20:27:36 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

isOpen
    "for compatibility with externalStream:
     return true, if this stream is open."

    ^ stream notNil and:[stream isOpen].

    "Modified (comment): / 09-01-2018 / 18:34:47 / stefan"
!

isPositionable
    ^ stream isPositionable

    "Created: / 14-03-2014 / 16:18:57 / Jan Vrany <jan.vrany@fit.cvut.cz>"
!

isReadable
    ^ stream isReadable
!

isWritable
    ^ stream isWritable
! !

!EncodedStream methodsFor:'utilities'!

skipEncodingChunk
    |pos chunk token|

    stream isPositionable ifFalse:[
        ^ self
    ].

    pos := self position.
    chunk := self nextChunk.
    [
        "/ if this is a valid chunk (i.e. not a comment or encoding-directive),
        "/ then position back, so it will be processed as usual.
"/ We could parse here, but this is overkill, since we are only interested in the fact,
"/ that there is anything else than a comment in the chunk.
"/        result := (Parser for:chunk)
"/            ignoreErrors:true;
"/            ignoreWarnings:true;
"/            parseMethodBody.

        token := (Scanner for:chunk)
                     ignoreErrors:true;
                     ignoreWarnings:true;
                     nextToken.

        token ~= #EOF ifTrue:[
            self position:pos
        ].
    ] on:Parser parseWarningSignal do:[:ex|
        "really ignore any error.
         Even setting ignoreError will output diagnostics here
         during standalone startup when debugging"
        ex proceedWith:#ignore.
    ].

    "Modified: / 29-07-2011 / 17:42:11 / cg"
    "Modified (format): / 16-02-2017 / 16:01:09 / stefan"
! !

!EncodedStream class methodsFor:'documentation'!

version
    ^ '$Header$'
!

version_CVS
    ^ '$Header$'
!

version_SVN
    ^ '$ Id: EncodedStream.st 10643 2011-06-08 21:53:07Z vranyj1  $'
! !