transforms/Xtreams__EncodeReadStream.st
author Jan Vrany <jan.vrany@fit.cvut.cz>
Wed, 01 Feb 2012 00:34:28 +0000
changeset 97 2a7827f4dce2
parent 90 59f68d289949
child 111 44ac233b2f83
permissions -rw-r--r--
pool name fixes

"{ Package: 'stx:goodies/xtreams/transforms' }"

"{ NameSpace: Xtreams }"

ReadStream subclass:#EncodeReadStream
	instanceVariableNames:'transparent crPreceeding encoder buffer bufferWriting
		bufferReading'
	classVariableNames:''
	poolDictionaries:'Xtreams::XtreamsPool'
	category:'Xtreams-Transforms'
!

EncodeReadStream comment:'Converts bytes into characters using pre-configured encoding. At the same time, if set to lineEndAuto (default) it can perform line-end translation, converting any line-end convention into CRs. The source stream must provide bytes (0...255).

Instance Variables
	transparent	<Boolean> should the stream perform line-end translations
	crPreceeding	<Boolean> was previous character read a CR (used when not transparent)
	encoder	<Encoder> converts bytes to characters
	buffer	<Buffer on: ByteArray> used to optimize bulk reads
	bufferWriting	<WriteStream> write stream on buffer
	bufferReading	<ReadStream> read stream on buffer

'
!


!EncodeReadStream class methodsFor:'instance creation'!

on: aSource encoding: anEncoding
	^self new on: aSource encoding: anEncoding
! !

!EncodeReadStream methodsFor:'accessing'!

encoder

	^encoder
!

get
	| character |
	buffer hasDataToRead ifTrue: [^super get].
	character := encoder decodeFrom: source.
	transparent ifFalse: 
		[character == LF
			ifTrue: [crPreceeding
				ifTrue: 
					[character := encoder decodeFrom: source.
					crPreceeding := character = CR]
				ifFalse: 
					[crPreceeding := false.
					character := CR]]
			ifFalse: [crPreceeding := character = CR]].
	^character
!

read: anInteger into: aSequenceableCollection at: startIndex

	| remaining position character bufferAvailable |
	remaining := anInteger.
	position := startIndex.
	[remaining > 0] whileTrue: [
		| mark |
		"Top up our buffer if we have room and we need data"
		[bufferWriting write: (buffer writeSize min: remaining) from: source] on: Incomplete do: [:incomplete |
			(incomplete count == 0 and: [buffer hasDataToRead not]) ifTrue: [
				(Incomplete on: aSequenceableCollection count: anInteger - remaining at: startIndex) raise]].

		"We now conduct an inner loop that iterates over the buffer data while:
			a) we need to read more data
			b) there is data available in the buffer
			c) a character can successfully be decoded
		"

		"If our buffer size is too low before we begin our decode loop, we need to take an undo copy in case we cannot decode a character."
		buffer readSize < 10 ifTrue:
			[mark := buffer readPosition.
			encoder backupState ].

		[["The following may raise an incomplete, which means we don't have enough data in the buffer to decode the full character.
		 This is handled by the Incomplete exception capture before."
		character := encoder decodeFrom: bufferReading.

		"If we are not transparent, convert stray LFs in to CRs and CRLFs in to CRs"
		transparent ifFalse: [
			character == LF
				ifTrue:	[character := crPreceeding ifTrue: [nil] ifFalse: [CR]. crPreceeding := false]
				ifFalse:	[crPreceeding := character = CR]].

		"If we didn't filter out an LF at the tail of a CRLF, commit the character to the output."
		character == nil ifFalse:
			[aSequenceableCollection at: position put: character.
			remaining := remaining - 1.
			position := position + 1].

		"Find out how much data we have left in the buffer. If it's too low we need to keep track of the undo record in case we cannot decode a character."
		(bufferAvailable := buffer readSize) < 10 ifTrue:
			[mark := buffer readPosition.
			encoder backupState ].

		remaining > 0 and: [bufferAvailable > 0]] whileTrue]
			on: Incomplete do: [:incomplete |
				"We failed to decode a character, we've hit the end of the buffer and need to refill it. We rewind the buffer and leave the decoding loop
				 to return to the main loop where more data will be fetched in to our buffer."
				buffer readPosition: mark.
				encoder restoreState]].
	^anInteger
! !

!EncodeReadStream methodsFor:'initialize-release'!

close
	super close.
	buffer recycle.
	buffer := nil
!

contentsSpecies
        
        ^encoder contentsSpecies
!

on: aSource encoding: anEncoding

	super on: aSource.
	encoder := Encoder for: anEncoding.
	buffer := RingBuffer new: DefaultBufferSize class: ByteArray.
	bufferReading := buffer reading.
	bufferWriting := buffer writing.
	transparent := false.
	crPreceeding := false.
! !

!EncodeReadStream methodsFor:'line-end'!

setLineEndAuto

	transparent := false
!

setLineEndTransparent

	transparent := true
! !

!EncodeReadStream class methodsFor:'documentation'!

version_SVN
    ^ '$Id$'
! !