ReadStream.st
author Claus Gittinger <cg@exept.de>
Wed, 15 May 1996 17:30:49 +0200
changeset 1400 0ee12d945849
parent 1295 83f594f05c52
child 1406 fd7eed0e648f
permissions -rw-r--r--
checkin from browser

"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

PositionableStream subclass:#ReadStream
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'Streams'
!

!ReadStream class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    ReadStream defines protocol for reading streamwise over collections. 

    [author:]
        Claus Gittinger

"
! !

!ReadStream methodsFor:'converting'!

readStream
    "return a readStream from the receiver. Since this is already
     a readStream, return self."

    ^ self
! !

!ReadStream methodsFor:'emphasis'!

emphasis
    "return the emphasis of the current (i.e. next returned by #next)
     element. Streams on a string will return nil for all elements.
     Streams on collections which nothing at all about emphasises, 
     will report an error."

    ^ collection emphasisAt:position.

    "
     |t s|

     t := 'hello world' asText
                emphasizeFrom:1 to:5 with:#bold;
                emphasizeFrom:7 to:11 with:#italic.

     s := t readStream.
     [s atEnd] whileFalse:[
        Transcript show:(s emphasis); show:' '.
        Transcript show:''''; show:(s next); showCr:''''.
     ].
    "

    "Modified: 15.5.1996 / 17:30:33 / cg"
! !

!ReadStream methodsFor:'queries'!

isReadable
    ^ true
! !

!ReadStream methodsFor:'reading'!

next
    "return the next element; advance read pointer.
     return nil, if there is no next element.
     - tuned for a bit more speed on String/ByteArray/Array-Streams"

    |ret|

%{  /* NOCONTEXT */

    REGISTER int pos;
    unsigned ch;
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isNonNilObject(coll) && __bothSmallInteger(p, l)) {

        pos = __intVal(p);
        if (pos > 0 && pos <= __intVal(l)) {
            OBJ cls;

            cls = __qClass(coll);
            if (cls == @global(String)) {
                if (pos <= __stringSize(coll)) {
                    __INST(position) = __MKSMALLINT(pos + 1);
                    ch = __stringVal(coll)[pos-1];
                    RETURN ( __MKCHARACTER(ch) );
                }
            } else if (cls == @global(ByteArray)) {
                if (pos <= __byteArraySize(coll)) {
                    __INST(position) = __MKSMALLINT(pos + 1);
                    ch = __ByteArrayInstPtr(coll)->ba_element[pos-1];
                    RETURN ( __MKSMALLINT(ch) );
                }
            } else if (cls == @global(Array)) {
                if (pos <= __arraySize(coll)) {
                    __INST(position) = __MKSMALLINT(pos + 1);
                    RETURN ( __ArrayInstPtr(coll)->a_element[pos-1]);
                }
            }
        }
    }
%}
.
    (position > readLimit) ifTrue:[^ self pastEnd].
    ret := collection at:position.
    position := position + 1.
    ^ ret
!

nextAlphaNumericWord
    "read the next word (i.e. up to non letter-or-digit).
     return a string containing those characters.
     - tuned for speed on String-Streams for faster scanning"
%{
    /* speedup, if collection is a string */

    int pos, limit, sz;
    int len;
    char buffer[256];
    REGISTER unsigned char *cp;
    REGISTER unsigned ch;
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);
    
    if (__isString(coll) && __bothSmallInteger(p, l)) {

	pos = __intVal(p);
	limit = __intVal(l);
	sz = __qSize(coll) - OHDR_SIZE;
	if (sz < limit)
	    limit = sz; 
	cp = __stringVal(coll) + pos - 1;

	for (;;) {
	    if (pos > limit) break;
	    ch = *cp;

	    if (ch > ' ') break;
	    if ((ch != ' ') && (ch != '\t') && (ch != '\r')
	     && (ch != '\n') && (ch != 0x0b)) break;
	    cp++;
	    pos++;
	}

	len = 0;
	for (;;) {
	    if (pos > limit) break;
	    ch = *cp & 0xFF;

	    if (! (((ch >= 'a') && (ch <= 'z')) ||
		   ((ch >= 'A') && (ch <= 'Z')) ||
		   ((ch >= '0') && (ch <= '9'))))
		break;
	    buffer[len++] = ch;
	    if (len >= (sizeof(buffer)-1)) {
		/* emergency */
		break;
	    }
	    pos++;
	    cp++;
	}

	__INST(position) = __MKSMALLINT(pos);
	buffer[len] = '\0';
	RETURN ( (len != 0) ? __MKSTRING_L(buffer, len COMMA_CON) : nil );
    }
%}
.
    ^ super nextAlphaNumericWord
!

nextDecimalInteger
    "read the next integer in radix 10. dont skip whitespace.
     - tuned for speed on String-Streams for faster scanning"

    |value nextOne|
%{
    int pos, limit, sz;
    REGISTER unsigned char *cp;
    REGISTER unsigned ch;
    int val = 0;
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);
    
    if (__isString(coll) && __bothSmallInteger(p, l)) {

	pos = __intVal(p);
	limit = __intVal(l);
	sz = __qSize(coll) - OHDR_SIZE;
	if (sz < limit)
	    limit = sz; 
	cp = __stringVal(coll) + pos - 1;

	for (;;) {
	    if (pos > limit) break;
	    ch = *cp;

	    if ((ch < '0') || (ch > '9')) break;
	    val = val * 10 + (ch - '0');
	    pos++;
	    if (val > (_MAX_INT / 10)) goto oops;
	    cp++;
	}
	__INST(position) = __MKSMALLINT(pos);
	return __MKSMALLINT(val);
    }
oops:
    value = __MKSMALLINT(val);
%}
.
    "fall-back for non-string streams - we have to continue where
     above primitive left off, in case of a large integer ...
     (instead of doing a super nextDecimalInteger)"

    nextOne := self peek.
    [nextOne notNil and:[nextOne isDigitRadix:10]] whileTrue:[
	value = (value * 10) + nextOne digitValue.
	nextOne := self nextPeek
    ].
    ^ value
!

nextPeek
    "advance read pointer return the peek element.
     this is equivalent to (self next; peek).
     - tuned for speed on String-Streams for faster scanning"

%{  /* NOCONTEXT */
    OBJ coll, l, p;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isString(coll) && __bothSmallInteger(p, l)) {
        REGISTER int pos;
        unsigned ch;

        pos = __intVal(p);
        if ((pos > 0) && (pos < __intVal(l)) && (pos < __stringSize(coll))) {
            pos += 1;
            __INST(position) = __MKSMALLINT(pos);
            ch = __stringVal(coll)[pos-1];
            RETURN ( __MKCHARACTER(ch) );
        }
    }
%}
.
    (position > readLimit) ifTrue:[^ self pastEnd].
    position := position + 1.
    (position > readLimit) ifTrue:[^ self pastEnd].
    ^ collection at:position
!

nextSymbol
    "read the next selector-symbol (i.e. up to non letter-or-digit).
     return a string containing those characters.
     - tuned for speed on String-Streams for faster scanning"
%{
    int pos, limit, sz;
    int len;
    char buffer[256];
    REGISTER unsigned char *cp;
    REGISTER unsigned ch;
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isString(coll) && __bothSmallInteger(p, l)) {

	pos = __intVal(p);
	limit = __intVal(l);
	sz = __qSize(coll) - OHDR_SIZE;
	if (sz < limit)
	    limit = sz; 
	cp = __stringVal(coll) + pos - 1;

	len = 0;
	for (;;) {
	    if (pos > limit) break;
	    ch = *cp;

	    if (! (((ch >= 'a') && (ch <= 'z')) ||
		   ((ch >= 'A') && (ch <= 'Z')) ||
		   ((ch >= '0') && (ch <= '9')) ||
		   (ch == ':')))
		break;
	    buffer[len++] = ch;
	    if (len >= (sizeof(buffer)-1)) {
		/* emergency */
		break;
	    }
	    pos++;
	    cp++;
	}

	__INST(position) = __MKSMALLINT(pos);
	buffer[len] = '\0';
	RETURN ( (len != 0) ? __MKSTRING_L(buffer, len COMMA_CON) : nil );
    }
%}
.
    ^ super nextSymbol
!

peek
    "return the next element; do NOT advance read pointer.
     return nil, if there is no next element.
     - tuned for a bit more speed on String/ByteArray/Array-Streams"

%{  /* NOCONTEXT */

    REGISTER int pos;
    unsigned ch;
    OBJ coll;
    OBJ cls, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isNonNilObject(coll) && __bothSmallInteger(p, l)) {

        pos = __intVal(p);
        if (pos <= __intVal(l) && pos > 0) {
            cls = __qClass(coll);
            if (cls == @global(String)) {
                if (pos <= __stringSize(coll)) {
                    ch = __stringVal(coll)[pos-1];
                    RETURN ( __MKCHARACTER(ch) );
                }
            } else if (cls == @global(ByteArray)) {
                if (pos <= __byteArraySize(coll)) {
                    ch = __ByteArrayInstPtr(coll)->ba_element[pos-1];
                    RETURN ( __MKSMALLINT(ch) );
                }
            } else if (cls == @global(Array)) {
                if (pos <= __arraySize(coll)) {
                    RETURN ( __ArrayInstPtr(coll)->a_element[pos-1]);
                }
            }
        }
    }
%}
.
    (position > readLimit) ifTrue:[^ self pastEnd].
    ^ collection at:position
!

skipSeparators
    "skip all whitespace; next will return next non-white-space element.
     Return the peeked character or nil, if the end-of-stream was reached.
     - reimplemented for speed on String-Streams for faster scanning"

%{  /* NOCONTEXT */
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isString(coll) && __bothSmallInteger(p, l)) {
	REGISTER unsigned char *chars;
	REGISTER unsigned ch;
	REGISTER int pos;
	int limit;

	pos = __intVal(p);
	if (pos <= 0) {
	    RETURN ( nil );
	}

	limit = __intVal(l);
	if (limit > (__qSize(coll) - OHDR_SIZE))
	    limit = __qSize(coll) - OHDR_SIZE;

	chars = (unsigned char *)(__stringVal(coll) + pos - 1);
	while (pos <= limit) {
	    ch = *chars++;
	    if ((ch != ' ') && (ch != '\t') && (ch != '\r')
	     && (ch != '\n') && (ch != 0x0B)) {
		__INST(position) = __MKSMALLINT(pos);
		RETURN ( __MKCHARACTER(ch) );
	    }
	    pos++;
	}
	__INST(position) = __MKSMALLINT(pos);
	RETURN ( nil );
    }
%}
.
    ^ super skipSeparators
!

skipSeparatorsExceptCR
    "skip all whitespace except newlines;
     next will return next non-white-space element.
     - reimplemented for speed on String-Streams for faster scanning"

%{  /* NOCONTEXT */

    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isString(coll) && __bothSmallInteger(p, l)) {
	REGISTER unsigned char *chars;
	REGISTER unsigned ch;
	REGISTER int pos;
	int limit;

	pos = __intVal(p);
	if (pos <= 0) {
	    RETURN ( nil );
	}

	limit = __intVal(l);
	if (limit > (__qSize(coll) - OHDR_SIZE))
	    limit = __qSize(coll) - OHDR_SIZE;

	chars = (unsigned char *)(__stringVal(coll) + pos - 1);
	while (pos <= limit) {
	    ch = *chars++;
	    if ((ch != ' ') && (ch != '\t') && (ch != '\r') && (ch != 0x0B)) {
		__INST(position) = __MKSMALLINT(pos);
		RETURN ( __MKCHARACTER(ch) );
	    }
	    pos++;
	}
	__INST(position) = __MKSMALLINT(pos);
	RETURN ( nil );
    }
%}
.
    ^ super skipSeparatorsExceptCR
!

skipThrough:anObject
    "skip all objects up-to and including anObject, return anObject on success,
     nil if end-of-stream is reached before. 
     On success, the next read operation will return the element after anObject.
     - reimplemented for speed on String-Streams for faster scanning"

%{  /* NOCONTEXT */
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isString(coll)
     && __isCharacter(anObject)
     && __bothSmallInteger(p, l)) {
	REGISTER unsigned char *chars;
	REGISTER int pos, limit;
	unsigned ch;
	int sz;

	pos = __intVal(p);
	if (pos <= 0) {
	    RETURN ( nil );
	}

	limit = __intVal(l);
	sz = __stringSize(coll);
	if (limit > sz) limit = sz;

	chars = (unsigned char *)(__stringVal(coll) + pos - 1);
	ch = __intVal(_characterVal(anObject)) & 0xFF;
	while (pos < limit) {
	    if (*chars == ch) {
		ch = *++chars;
		pos++;
		__INST(position) = __MKSMALLINT(pos);
		RETURN ( anObject );
	    }
	    chars++;
	    pos++;
	}
	RETURN ( nil );
    }
%}
.
    ^ super skipThrough:anObject
!

skipToAll:aCollection
    "skip for the sequence given by the argument, aCollection;
     return nil if not found, self otherwise. On a successful match, next read
     will return elements of aCollection."

    |oldPos buffer l first idx|

    oldPos := self position.
    l := aCollection size.
    first := aCollection at:1.
    [self atEnd] whileFalse:[
	buffer := self next:l.
	buffer = aCollection ifTrue:[
	    self position:(self position - l).
	    ^ self
	].
	idx := buffer indexOf:first startingAt:2.
	idx == 0 ifFalse:[
	    self position:(self position - l + idx - 1)
	]
    ].
    self position:oldPos.
    ^ nil

    "
     |s|
     s := ReadStream on:'12345678901234567890'.
     s skipToAll:'901'.
     s next:4
    "
! !

!ReadStream methodsFor:'writing'!

nextPut:anElement
    "catch write access to readstreams - report an error"

    self shouldNotImplement
! !

!ReadStream class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/ReadStream.st,v 1.27 1996-05-15 15:30:49 cg Exp $'
! !