CompressionStream.st
author Stefan Vogel <sv@exept.de>
Tue, 06 May 2003 16:00:06 +0200
changeset 1193 7688148ec90c
parent 1083 5a0ae155f400
child 1195 9d887847d1b6
permissions -rw-r--r--
Code formatting & clean up

"{ Package: 'ca:Compress' }"

Stream subclass:#CompressionStream
	instanceVariableNames:'onStream hitEOF binary position readLimit mode inputBytes
		outputBytes zstream'
	classVariableNames:'BlockSize'
	poolDictionaries:''
	category:'System-Compress'
!


!CompressionStream class methodsFor:'initialization'!

initialize
    BlockSize := 6.
! !

!CompressionStream class methodsFor:'instance creation'!

openWithMode:modeSymbol on:aStream
    "open on aStream with mode modeSymbol (#readonly #writeonly)"

    ^ self subclassResponsibility
!

readOpenOn:aStream
    "read data from an compressed stream
    "
    ^ self basicNew openWithMode:#readonly on:aStream
!

writeOpenOn:aStream
    "write data compressed to stream 
    "
    ^ self basicNew openWithMode:#writeonly on:aStream
! !

!CompressionStream class methodsFor:'test'!

doTestNextN
"
    CompressionStream doTestNextN
"
   |stream time file zipCont nxtCont|

   file := '/phys/exept/tmp/yyy/201--T22--D.20000415.SAGSA.DE0220523.gz' asFilename.
   file isReadable ifFalse:[^ self error:'not existant'].

   time := Time millisecondsToRun:[ |zipStream|
        zipStream := stream := zipCont := nil.
        [
            stream    := file readStream.
            zipStream := ZipStream readOpenOn:stream.
            zipCont   := zipStream contents.
        ] valueNowOrOnUnwindDo:[
            zipStream notNil ifTrue:[zipStream close].
            stream    notNil ifTrue:[stream close].
        ].
   ].
   Transcript showCR:('STX   Time : %1  Size: %2' bindWith:time with:(zipCont size)).

   time := Time millisecondsToRun:[ |zipStream wstream|
        zipStream := stream := nxtCont := nil.
        [
            stream    := file readStream.
            wstream   := '' writeStream.
            zipStream := ZipStream readOpenOn:stream.

            [zipStream atEnd] whileFalse:[
                wstream nextPutAll:(zipStream next:117)
            ].
            nxtCont := wstream contents.
        ] valueNowOrOnUnwindDo:[
            zipStream notNil ifTrue:[zipStream close].
            stream    notNil ifTrue:[stream close].
        ].
   ].
   Transcript showCR:('NEXT  Time : %1  Size: %2' bindWith:time with:(nxtCont size)).

   nxtCont = zipCont ifTrue:[ Transcript showCR:'OK' ]
                    ifFalse:[ self error:'contents differs' ].
!

doTestSkipN
"
    CompressionStream doTestSkipN
"
   |stream time file skpCont nxtCont skip|

   file := '/phys/exept/tmp/yyy/201--T22--D.20000415.SAGSA.DE0220523.gz' asFilename.
   file isReadable ifFalse:[^ self error:'not existant'].

   skip := 6885379.

   time := Time millisecondsToRun:[ |zipStream wstream|
        zipStream := stream := nxtCont := nil.
        [
            stream    := file readStream.
            wstream   := '' writeStream.
            zipStream := ZipStream readOpenOn:stream.
            skip timesRepeat:[ zipStream next ].

            [zipStream atEnd] whileFalse:[
                wstream nextPutAll:(zipStream next:117)
            ].
            nxtCont := wstream contents.
        ] valueNowOrOnUnwindDo:[
            zipStream notNil ifTrue:[zipStream close].
            stream    notNil ifTrue:[stream close].
        ].
   ].

   Transcript showCR:('STX   Time : %1  Size: %2' bindWith:time with:(nxtCont size)).

   time := Time millisecondsToRun:[ |zipStream wstream|
        zipStream := stream := skpCont := nil.
        [
            stream    := file readStream.
            wstream   := '' writeStream.
            zipStream := ZipStream readOpenOn:stream.
            zipStream skip:skip.
            [zipStream atEnd] whileFalse:[
                wstream nextPutAll:(zipStream next:117)
            ].
            skpCont := wstream contents.
        ] valueNowOrOnUnwindDo:[
            zipStream notNil ifTrue:[zipStream close].
            stream    notNil ifTrue:[stream close].
        ].
   ].
   Transcript showCR:('NEXT  Time : %1  Size: %2' bindWith:time with:(skpCont size)).

   nxtCont = skpCont ifTrue:[ Transcript showCR:'OK' ]
                    ifFalse:[ self error:'contents differs' ].

!

doTestUnixAgainstClass
"
    CompressionStream doTestUnixAgainstClass
"
   |stream time file zipCont cmdCont|

   file := '/phys/exept/tmp/yyy/201--T22--D.20000415.SAGSA.DE0220523.gz' asFilename.
   file isReadable ifFalse:[^ self error:'not existant'].

   time := Time millisecondsToRun:[ |zipStream|
        zipStream := stream := zipCont := nil.
        [
            stream    := file readStream.
            zipStream := ZipStream readOpenOn:stream.
            zipCont   := zipStream contents.
        ] valueNowOrOnUnwindDo:[
            zipStream notNil ifTrue:[zipStream close].
            stream    notNil ifTrue:[stream close].
        ].
   ].
   Transcript showCR:('STX   Time : %1  Size: %2' bindWith:time with:(zipCont size)).

   time := Time millisecondsToRun:[ |command|
        cmdCont := stream := nil.
        [
            command := 'gunzip < ' , file pathName.
            stream  := PipeStream readingFrom:command.
            cmdCont := stream contentsOfEntireFile.

        ] valueNowOrOnUnwindDo:[
            stream notNil ifTrue:[stream close].
        ]
   ].
   Transcript showCR:('UNIX  Time : %1  Size: %2' bindWith:time with:(cmdCont size)).

   cmdCont = zipCont ifTrue:[ Transcript showCR:'OK' ]
                    ifFalse:[ self error:'contents differs' ].
!

test
"
CompressionStream test
"
   |original compressed contents in out zip|

   original := 'smalltalk.rc' asFilename contentsOfEntireFile.

   in := original readStream.

   [ |b|
        out := #[] writeStream.
        zip := self writeOpenOn:out.

        [in atEnd] whileFalse:[
            (b := in next) ifNotNil:[zip nextPut:b]
        ]
   ] valueNowOrOnUnwindDo:[ zip ifNotNil:[ zip close ] ].

   compressed := out contents.
   [ |b|
        zip := self readOpenOn:(compressed readStream).
        out := '' writeStream.

        [ (b := zip next) notNil ] whileTrue:[ out nextPut:b ]

   ] valueNowOrOnUnwindDo:[
        zip ifNotNil:[ zip close ].
        contents := out contents.

        Transcript showCR:(contents   size).
        Transcript showCR:(compressed size).
   ].

   original = contents ifFalse:[
        self halt:'contents differs'.
      ^ self
   ].
   Transcript showCR:'OK'.
!

testFile
"
CompressionStream testFile
"
   |fileContents in zip out gzipCmd|

   fileContents := 'smalltalk.rc' asFilename contentsOfEntireFile.

   in  := fileContents readStream.
   out := FileStream newFileNamed:'YYY.gz'.
   out ifNil:[ ^ self ].

   [ zip := self writeOpenOn:out.

     [in atEnd] whileFalse:[ |buf|
        buf := in nextAvailable:512.
        buf do:[:n|
            zip nextPut:n
        ]
     ].
   ] valueNowOrOnUnwindDo:[
        zip ifNotNil:[ zip close ].
        out close.
   ].
   gzipCmd := 'gzip -dc YYY.gz > YYY; diff YYY smalltalk.rc'.

   Transcript showCR:gzipCmd.
   gzipCmd printCR.
! !

!CompressionStream methodsFor:'accessing'!

binary
    "switch to binary mode - default is text
    "
    binary := true.
!

text
    "switch to text mode - default is text"

    binary := false.
! !

!CompressionStream methodsFor:'error handling'!

errorNotOpen
    "report an error, that the stream has not been opened"

    self zerror:'not open'.
!

errorReadOnly
    "report an error, that the stream is a readOnly stream"

    self zerror:'is readonly'
!

errorWriteOnly
    "report an error, that the stream is a writeOnly stream"

    self zerror:'is writeonly'
!

invalidArgument
    "called if a method is invoked with invalid parameters"

    self zerror:'invalid arguments'.
!

pastEnd
    self zerror:'end of stream'.
!

zerror:anError
    |error|

    zstream isNil ifTrue:[
        error := 'not open'.
    ] ifFalse:[
        anError isNumber ifTrue:[
                     anError ==  1 ifTrue:[ error := 'stream at end' ]
            ifFalse:[anError == -1 ifTrue:[ error := 'processing error: ', anError printString ]
            ifFalse:[anError == -2 ifTrue:[ error := 'processing error' ]
            ifFalse:[anError == -3 ifTrue:[ error := 'input data are corrupted' ]
            ifFalse:[anError == -4 ifTrue:[ error := 'not enough memory' ]
            ifFalse:[anError == -5 ifTrue:[ error := 'not enough memory in the output stream' ]
            ifFalse:[anError == -6 ifTrue:[ error := 'version error' ]
            ifFalse:[
                    error := 'compressing error: ', anError printString                
            ]]]]]]].
        ] ifFalse:[
            error := anError printString
        ].
    ].
    self closeZStream.
    Stream streamErrorSignal raiseErrorString:(self class name , ': ', error).
! !

!CompressionStream methodsFor:'finalization'!

executor
    "redefined to return a lightweight copy 
     - all we need is the memory handle"

    ^ self class basicNew finalizeCopy:zstream.    
!

finalize
    "the compressin-stream was garbage collected;
     close the underlying zip-stream"

    self closeZStream.
!

finalizeCopy:aZStream
    "used for finalization to close the underlying zip-stream"

    zstream := aZStream.
! !

!CompressionStream methodsFor:'low level'!

z_nextAvailableInto:aCollection startingAt:offset
    "read the next available bytes into a collection, a string or byteArray;
     returns the size read"

    |start count avail|

    avail := readLimit - position.
    avail > 0 ifFalse:[^ 0].

    count := aCollection size - offset + 1.

    count > 0 ifFalse:[
        count < 0 ifTrue:[
            self zerror:'invalid arguments'
        ].
        ^ 0
    ].
    count    := avail min:count.
    start    := position.
    position := position + count.

%{  unsigned char * _dstPt;

    if( __isBytes(aCollection) ) {
        _dstPt = (unsigned char *) (__ByteArrayInstPtr(aCollection)->ba_element);
    } else if (__isString(aCollection)) {
        _dstPt = (unsigned char *) (__stringVal( aCollection));
    } else
        _dstPt = (unsigned char *) 0;

    if( _dstPt )
    {
        int             _loop, _count, _offset;
        unsigned char * _srcPt;
        OBJ             _srcObj = __INST( outputBytes );

        _offset = __intVal( offset );
        _dstPt  = _dstPt + _offset - 1;

        _srcPt  = (unsigned char *) __externalBytesAddress( _srcObj );
        _srcPt += __intVal( start );
        _count  = __intVal( count );

        for( _loop = 0; _loop < _count; ++_loop )
            * _dstPt++ = * _srcPt++;

        RETURN(__MKSMALLINT(_count));
    }
%}.

    ^ self zerror:'invalid argument'
!

zclose
    "low level close of the zip stream"

    ^ self subclassResponsibility
!

zdeflate
    "low level - deflate
     returns false if the deflate operation is finished otherwise true"

    ^ self subclassResponsibility
!

zdeflateInit
    "low level - deflateInit
     initialize the deflate mode, write header"

    ^ self subclassResponsibility
!

zget_avail_out
    "low level - get the number of available out bytes"

    ^ self subclassResponsibility
!

zinflate
    "low level - inflate
     returns nil if at uncompress is finished, or the number of
     available bytes in the output-buffer"

    ^ self subclassResponsibility
!

zinflateInit
    "low level - inflateInit
     initialize the inflate mode, read and check header"

    ^ self subclassResponsibility
!

zopen
    "low level - opens the zip stream
     create the resources"

    ^ self subclassResponsibility
!

zset_avail_in:count
    "set the 'avail_in' and compute the crc"

    ^ self subclassResponsibility
! !

!CompressionStream methodsFor:'private'!

closeZStream
    "close the zip-stream"

    onStream := mode := nil.
    hitEOF   := true.

    zstream ifNotNil:[
        self unregisterForFinalization.
        self zclose.
        zstream := nil.
    ].
!

contentsSpecies
    "return the kind of object to be returned by sub-collection builders"

    binary == true ifTrue:[^ ByteArray].
    ^ String
!

onStreamPutBytes:count from:data
    "write compressed data to the (output) stream"

    onStream isNil ifTrue:[self errorNotOpen].
    onStream nextPutBytes:count from:data startingAt:1
! !

!CompressionStream methodsFor:'queries'!

atEnd
    "return true if the end of the compressed input stream has been reached"

    ^ hitEOF ~~ false
!

canReadWithoutBlocking
    "returns true if data are available for reading;
     false if the stream is at end.
     updates the readLimit and position"

    mode == #readonly ifFalse:[
        self errorReadOnly
    ].
    hitEOF == true ifTrue:[ ^ false ].

    position >= readLimit ifTrue:[
        [ (readLimit := self zinflate) == 0 ] whileTrue:[ |n|
            n := onStream nextBytes:(inputBytes size) into:inputBytes startingAt:1.

            (n notNil and:[n > 0]) ifFalse:[
                self pastEnd
            ].
            self zset_avail_in:n.
        ].
        readLimit ifNil:[
            hitEOF := true.
          ^ false
        ].
        position := 0.
    ].
    ^ true
!

isBinary
    "return true, if the stream is in binary (as opposed to text-) mode.
     The default when created is false"

    ^ binary
!

isOpen
    "return true, if this stream is open"

    ^ onStream notNil
!

isReadable
    "return true, if this stream can be read from"

    ^ mode == #readonly
!

isWritable
    "return true, if this stream can be written to"

    ^ mode == #writeonly
! !

!CompressionStream methodsFor:'reading'!

contents
    "return the entire contents of and close the stream"

    |species stream bfsize buffer count|

    mode == #readonly ifFalse:[ self errorReadOnly ].
    bfsize := outputBytes size.

    species := self contentsSpecies.
    buffer  := species new:bfsize.
    stream  := (species new:bfsize) writeStream.

    [ self canReadWithoutBlocking ] whileTrue:[
        count := self z_nextAvailableInto:buffer startingAt:1.

        count == bfsize ifTrue:[
            stream nextPutAll:buffer.
        ] ifFalse:[
            count > 0 ifTrue:[    
                stream nextPutAll:(buffer copyFrom:1 to:count)
            ]
        ].
    ].
    self close.
    ^ stream contents
!

next
    "return the next element, a character or byte (textmode)
     return nil, if there are no more elements"

    |byte|

    self canReadWithoutBlocking ifFalse:[
        "there is no more element; the stream is at end"
        ^ nil
    ].
    position := position + 1.
    byte := outputBytes at:position.

    binary ifTrue:[^ byte ].
    ^ Character value:byte
!

next:n
    "return the next count elements of the stream as a collection.
     Redefined to return a String or ByteArray and for optimization"

    |data count offset species|

    species := self contentsSpecies.

    self canReadWithoutBlocking ifFalse:[
        ^ species new
    ].
    data := species new:n.
    offset := 1.

    [self canReadWithoutBlocking] whileTrue:[
        count  := self z_nextAvailableInto:data startingAt:offset.
        offset := count + offset.
        offset > n ifTrue:[^ data]
    ].
    ^ data copyFrom:1 to:(offset - 1)
!

skip:count 
    "skip count objects, return the receiver
     redefined for optimization"

    |n avail|

    n := count.

    n <= 0 ifTrue:[
        n ~~ 0 ifTrue:[
            "dont know how to unread ..."
            PositionErrorSignal raiseRequest
        ].
        ^ self
    ].

    [self canReadWithoutBlocking] whileTrue:[
        avail := readLimit - position.

        avail >= n ifTrue:[
            position := position + n.
            ^ self
        ].
        position := readLimit := 0. "/ reset input
        n := n - avail.
    ].
! !

!CompressionStream methodsFor:'startup & release'!

close
    "close the zip-stream"

    hitEOF := true.
    zstream notNil ifTrue:[
        self flush.
        self closeZStream.
    ].
!

openWithMode:aMode on:aStream
    "open the zip-stream on a stream
         #readonly    uncompress the data derived from the read-stream,  aStream
         #writeonly   compress   the data and write to the write-stream, aStream
    "
    (onStream := aStream) ifNil:[ self errorNotOpen ].

    mode        := aMode.
    outputBytes := ExternalBytes unprotectedNew:8192.
    inputBytes  := ExternalBytes unprotectedNew:8192.
    readLimit   := position := 0.
    binary      := false.

    self zopen.
    self registerForFinalization.

    hitEOF := false.

    aMode == #readonly ifTrue:[
        self zinflateInit.
    ] ifFalse:[
        self zdeflateInit
    ].
! !

!CompressionStream methodsFor:'writing'!

contents:contents
    "write the entire contents to and close the stream"

    contents do:[:c| self nextPut:c].
    self close.
!

flush
    "flush the input and output buffer"

    |continue|

    self isWritable ifFalse:[ ^ self ].
    self zset_avail_in:position.

    position := 0.
    continue := true.

    [continue] whileTrue:[ |count|
        count := self zget_avail_out.

        count > 0 ifTrue:[
            self onStreamPutBytes:count from:outputBytes
        ].
        continue := self zdeflate.
    ].
!

nextPut:aByteOrCharacter
    "write the argument, aByteOrCharacter"

    position == inputBytes size ifTrue:[self flush].
    position := position + 1.
    inputBytes at:position put:aByteOrCharacter asInteger.
! !

!CompressionStream class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic2/CompressionStream.st,v 1.9 2003-05-06 13:59:54 stefan Exp $'
! !

CompressionStream initialize!