transforms/Xtreams__EncodeReadStream.st
author Jan Vrany <jan.vrany@fit.cvut.cz>
Wed, 01 Feb 2012 00:34:28 +0000
changeset 97 2a7827f4dce2
parent 90 59f68d289949
child 111 44ac233b2f83
permissions -rw-r--r--
pool name fixes
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
     1
"{ Package: 'stx:goodies/xtreams/transforms' }"
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
     2
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
     3
"{ NameSpace: Xtreams }"
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
     4
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
     5
ReadStream subclass:#EncodeReadStream
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
     6
	instanceVariableNames:'transparent crPreceeding encoder buffer bufferWriting
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
     7
		bufferReading'
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
     8
	classVariableNames:''
97
2a7827f4dce2 pool name fixes
Jan Vrany <jan.vrany@fit.cvut.cz>
parents: 90
diff changeset
     9
	poolDictionaries:'Xtreams::XtreamsPool'
27
2cc5a8a3ca14 added XtreamsPool to fix DefaultBufferSize; set proper category names
mkobetic
parents: 10
diff changeset
    10
	category:'Xtreams-Transforms'
10
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    11
!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    12
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    13
EncodeReadStream comment:'Converts bytes into characters using pre-configured encoding. At the same time, if set to lineEndAuto (default) it can perform line-end translation, converting any line-end convention into CRs. The source stream must provide bytes (0...255).
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    14
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    15
Instance Variables
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    16
	transparent	<Boolean> should the stream perform line-end translations
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    17
	crPreceeding	<Boolean> was previous character read a CR (used when not transparent)
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    18
	encoder	<Encoder> converts bytes to characters
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    19
	buffer	<Buffer on: ByteArray> used to optimize bulk reads
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    20
	bufferWriting	<WriteStream> write stream on buffer
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    21
	bufferReading	<ReadStream> read stream on buffer
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    22
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    23
'
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    24
!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    25
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    26
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    27
!EncodeReadStream class methodsFor:'instance creation'!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    28
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    29
on: aSource encoding: anEncoding
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    30
	^self new on: aSource encoding: anEncoding
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    31
! !
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    32
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    33
!EncodeReadStream methodsFor:'accessing'!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    34
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    35
encoder
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    36
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    37
	^encoder
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    38
!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    39
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    40
get
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    41
	| character |
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    42
	buffer hasDataToRead ifTrue: [^super get].
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    43
	character := encoder decodeFrom: source.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    44
	transparent ifFalse: 
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    45
		[character == LF
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    46
			ifTrue: [crPreceeding
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    47
				ifTrue: 
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    48
					[character := encoder decodeFrom: source.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    49
					crPreceeding := character = CR]
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    50
				ifFalse: 
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    51
					[crPreceeding := false.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    52
					character := CR]]
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    53
			ifFalse: [crPreceeding := character = CR]].
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    54
	^character
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    55
!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    56
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    57
read: anInteger into: aSequenceableCollection at: startIndex
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    58
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    59
	| remaining position character bufferAvailable |
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    60
	remaining := anInteger.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    61
	position := startIndex.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    62
	[remaining > 0] whileTrue: [
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    63
		| mark |
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    64
		"Top up our buffer if we have room and we need data"
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    65
		[bufferWriting write: (buffer writeSize min: remaining) from: source] on: Incomplete do: [:incomplete |
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    66
			(incomplete count == 0 and: [buffer hasDataToRead not]) ifTrue: [
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    67
				(Incomplete on: aSequenceableCollection count: anInteger - remaining at: startIndex) raise]].
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    68
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    69
		"We now conduct an inner loop that iterates over the buffer data while:
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    70
			a) we need to read more data
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    71
			b) there is data available in the buffer
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    72
			c) a character can successfully be decoded
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    73
		"
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    74
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    75
		"If our buffer size is too low before we begin our decode loop, we need to take an undo copy in case we cannot decode a character."
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    76
		buffer readSize < 10 ifTrue:
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    77
			[mark := buffer readPosition.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    78
			encoder backupState ].
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    79
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    80
		[["The following may raise an incomplete, which means we don't have enough data in the buffer to decode the full character.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    81
		 This is handled by the Incomplete exception capture before."
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    82
		character := encoder decodeFrom: bufferReading.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    83
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    84
		"If we are not transparent, convert stray LFs in to CRs and CRLFs in to CRs"
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    85
		transparent ifFalse: [
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    86
			character == LF
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    87
				ifTrue:	[character := crPreceeding ifTrue: [nil] ifFalse: [CR]. crPreceeding := false]
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    88
				ifFalse:	[crPreceeding := character = CR]].
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    89
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    90
		"If we didn't filter out an LF at the tail of a CRLF, commit the character to the output."
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    91
		character == nil ifFalse:
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    92
			[aSequenceableCollection at: position put: character.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    93
			remaining := remaining - 1.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    94
			position := position + 1].
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    95
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    96
		"Find out how much data we have left in the buffer. If it's too low we need to keep track of the undo record in case we cannot decode a character."
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    97
		(bufferAvailable := buffer readSize) < 10 ifTrue:
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    98
			[mark := buffer readPosition.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
    99
			encoder backupState ].
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   100
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   101
		remaining > 0 and: [bufferAvailable > 0]] whileTrue]
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   102
			on: Incomplete do: [:incomplete |
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   103
				"We failed to decode a character, we've hit the end of the buffer and need to refill it. We rewind the buffer and leave the decoding loop
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   104
				 to return to the main loop where more data will be fetched in to our buffer."
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   105
				buffer readPosition: mark.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   106
				encoder restoreState]].
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   107
	^anInteger
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   108
! !
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   109
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   110
!EncodeReadStream methodsFor:'initialize-release'!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   111
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   112
close
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   113
	super close.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   114
	buffer recycle.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   115
	buffer := nil
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   116
!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   117
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   118
contentsSpecies
90
mkobetic
parents: 72
diff changeset
   119
        
mkobetic
parents: 72
diff changeset
   120
        ^encoder contentsSpecies
10
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   121
!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   122
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   123
on: aSource encoding: anEncoding
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   124
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   125
	super on: aSource.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   126
	encoder := Encoder for: anEncoding.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   127
	buffer := RingBuffer new: DefaultBufferSize class: ByteArray.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   128
	bufferReading := buffer reading.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   129
	bufferWriting := buffer writing.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   130
	transparent := false.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   131
	crPreceeding := false.
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   132
! !
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   133
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   134
!EncodeReadStream methodsFor:'line-end'!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   135
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   136
setLineEndAuto
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   137
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   138
	transparent := false
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   139
!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   140
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   141
setLineEndTransparent
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   142
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   143
	transparent := true
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   144
! !
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   145
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   146
!EncodeReadStream class methodsFor:'documentation'!
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   147
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   148
version_SVN
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   149
    ^ '$Id$'
3813193bdf4e first cut
Martin Kobetic <mkobetic@gmail.com>
parents:
diff changeset
   150
! !