ReadStr.st
author Claus Gittinger <cg@exept.de>
Fri, 13 Sep 1996 18:49:38 +0200
changeset 1665 928e9a308cea
parent 1488 f69b3de1b9d3
child 1688 8a42db1eea60
permissions -rw-r--r--
added ExternalStream compatibility protocol

"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

PositionableStream subclass:#ReadStream
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'Streams'
!

!ReadStream  class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    ReadStream defines protocol for reading streamwise over collections. 

    [author:]
        Claus Gittinger

"
! !

!ReadStream methodsFor:'converting'!

readStream
    "return a readStream from the receiver. Since this is already
     a readStream, return self."

    ^ self
! !

!ReadStream methodsFor:'emphasis'!

emphasis
    "return the emphasis of the current (i.e. next returned by #next)
     element. Streams on a string will return nil for all elements.
     Streams on collections which nothing at all about emphasises, 
     will report an error."

    ^ collection emphasisAt:position.

    "
     |t s|

     t := 'hello world' asText
                emphasizeFrom:1 to:5 with:#bold;
                emphasizeFrom:7 to:11 with:#italic.

     s := t readStream.
     [s atEnd] whileFalse:[
        Transcript show:(s emphasis); show:' '.
        Transcript show:''''; show:(s next); showCR:''''.
     ].
    "

    "Modified: 15.5.1996 / 17:30:33 / cg"
! !

!ReadStream methodsFor:'queries'!

isReadable
    ^ true
!

size
    ^ collection size

    "Created: 13.9.1996 / 18:14:35 / cg"
! !

!ReadStream methodsFor:'reading'!

next
    "return the next element; advance read pointer.
     return nil, if there is no next element.
     - tuned for a bit more speed on String/ByteArray/Array-Streams"

    |ret|

%{  /* NOCONTEXT */

    REGISTER int pos;
    unsigned ch;
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isNonNilObject(coll) && __bothSmallInteger(p, l)) {

        pos = __intVal(p);
        if (pos > 0 && pos <= __intVal(l)) {
            OBJ cls;

            cls = __qClass(coll);
            if (cls == @global(String)) {
                if (pos <= __stringSize(coll)) {
                    __INST(position) = __MKSMALLINT(pos + 1);
                    ch = __stringVal(coll)[pos-1];
                    RETURN ( __MKCHARACTER(ch) );
                }
            } else if (cls == @global(ByteArray)) {
                if (pos <= __byteArraySize(coll)) {
                    __INST(position) = __MKSMALLINT(pos + 1);
                    ch = __ByteArrayInstPtr(coll)->ba_element[pos-1];
                    RETURN ( __MKSMALLINT(ch) );
                }
            } else if (cls == @global(Array)) {
                if (pos <= __arraySize(coll)) {
                    __INST(position) = __MKSMALLINT(pos + 1);
                    RETURN ( __ArrayInstPtr(coll)->a_element[pos-1]);
                }
            }
        }
    }
%}
.
    (position > readLimit) ifTrue:[^ self pastEnd].
    ret := collection at:position.
    position := position + 1.
    ^ ret
!

nextAlphaNumericWord
    "read the next word (i.e. up to non letter-or-digit).
     return a string containing those characters.
     - tuned for speed on String-Streams for faster scanning"
%{
    /* speedup, if collection is a string */

    int pos, limit, sz;
    int len;
    char buffer[256];
    REGISTER unsigned char *cp;
    REGISTER unsigned ch;
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);
    
    if (__isString(coll) && __bothSmallInteger(p, l)) {

	pos = __intVal(p);
	limit = __intVal(l);
	sz = __qSize(coll) - OHDR_SIZE;
	if (sz < limit)
	    limit = sz; 
	cp = __stringVal(coll) + pos - 1;

	for (;;) {
	    if (pos > limit) break;
	    ch = *cp;

	    if (ch > ' ') break;
	    if ((ch != ' ') && (ch != '\t') && (ch != '\r')
	     && (ch != '\n') && (ch != 0x0b)) break;
	    cp++;
	    pos++;
	}

	len = 0;
	for (;;) {
	    if (pos > limit) break;
	    ch = *cp & 0xFF;

	    if (! (((ch >= 'a') && (ch <= 'z')) ||
		   ((ch >= 'A') && (ch <= 'Z')) ||
		   ((ch >= '0') && (ch <= '9'))))
		break;
	    buffer[len++] = ch;
	    if (len >= (sizeof(buffer)-1)) {
		/* emergency */
		break;
	    }
	    pos++;
	    cp++;
	}

	__INST(position) = __MKSMALLINT(pos);
	buffer[len] = '\0';
	RETURN ( (len != 0) ? __MKSTRING_L(buffer, len COMMA_CON) : nil );
    }
%}
.
    ^ super nextAlphaNumericWord
!

nextByte
    "return the next element as a byteValued integer"

    |ret|

    ret := self next.
    ((ret < 0) or:[ret > 255]) ifTrue:[
        self error:'oops - not a byte value in stream'.
        ^ nil
    ].
    ^ ret

    "Created: 13.9.1996 / 18:10:38 / cg"
!

nextDecimalInteger
    "read the next integer in radix 10. dont skip whitespace.
     - tuned for speed on String-Streams for faster scanning"

    |value nextOne|
%{
    int pos, limit, sz;
    REGISTER unsigned char *cp;
    REGISTER unsigned ch;
    int val = 0;
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);
    
    if (__isString(coll) && __bothSmallInteger(p, l)) {

	pos = __intVal(p);
	limit = __intVal(l);
	sz = __qSize(coll) - OHDR_SIZE;
	if (sz < limit)
	    limit = sz; 
	cp = __stringVal(coll) + pos - 1;

	for (;;) {
	    if (pos > limit) break;
	    ch = *cp;

	    if ((ch < '0') || (ch > '9')) break;
	    val = val * 10 + (ch - '0');
	    pos++;
	    if (val > (_MAX_INT / 10)) goto oops;
	    cp++;
	}
	__INST(position) = __MKSMALLINT(pos);
	return __MKSMALLINT(val);
    }
oops:
    value = __MKSMALLINT(val);
%}
.
    "fall-back for non-string streams - we have to continue where
     above primitive left off, in case of a large integer ...
     (instead of doing a super nextDecimalInteger)"

    nextOne := self peek.
    [nextOne notNil and:[nextOne isDigitRadix:10]] whileTrue:[
	value = (value * 10) + nextOne digitValue.
	nextOne := self nextPeek
    ].
    ^ value
!

nextPeek
    "advance read pointer return the peek element.
     this is equivalent to (self next; peek).
     - tuned for speed on String-Streams for faster scanning"

%{  /* NOCONTEXT */
    OBJ coll, l, p;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isString(coll) && __bothSmallInteger(p, l)) {
        REGISTER int pos;
        unsigned ch;

        pos = __intVal(p);
        if ((pos > 0) && (pos < __intVal(l)) && (pos < __stringSize(coll))) {
            pos += 1;
            __INST(position) = __MKSMALLINT(pos);
            ch = __stringVal(coll)[pos-1];
            RETURN ( __MKCHARACTER(ch) );
        }
    }
%}
.
    (position > readLimit) ifTrue:[^ self pastEnd].
    position := position + 1.
    (position > readLimit) ifTrue:[^ self pastEnd].
    ^ collection at:position
!

nextSymbol
    "read the next selector-symbol (i.e. up to non letter-or-digit).
     return a string containing those characters.
     - tuned for speed on String-Streams for faster scanning"
%{
    int pos, limit, sz;
    int len;
    char buffer[256];
    REGISTER unsigned char *cp;
    REGISTER unsigned ch;
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isString(coll) && __bothSmallInteger(p, l)) {

	pos = __intVal(p);
	limit = __intVal(l);
	sz = __qSize(coll) - OHDR_SIZE;
	if (sz < limit)
	    limit = sz; 
	cp = __stringVal(coll) + pos - 1;

	len = 0;
	for (;;) {
	    if (pos > limit) break;
	    ch = *cp;

	    if (! (((ch >= 'a') && (ch <= 'z')) ||
		   ((ch >= 'A') && (ch <= 'Z')) ||
		   ((ch >= '0') && (ch <= '9')) ||
		   (ch == ':')))
		break;
	    buffer[len++] = ch;
	    if (len >= (sizeof(buffer)-1)) {
		/* emergency */
		break;
	    }
	    pos++;
	    cp++;
	}

	__INST(position) = __MKSMALLINT(pos);
	buffer[len] = '\0';
	RETURN ( (len != 0) ? __MKSTRING_L(buffer, len COMMA_CON) : nil );
    }
%}
.
    ^ super nextSymbol
!

peek
    "return the next element; do NOT advance read pointer.
     return nil, if there is no next element.
     - tuned for a bit more speed on String/ByteArray/Array-Streams"

%{  /* NOCONTEXT */

    REGISTER int pos;
    unsigned ch;
    OBJ coll;
    OBJ cls, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isNonNilObject(coll) && __bothSmallInteger(p, l)) {

        pos = __intVal(p);
        if (pos <= __intVal(l) && pos > 0) {
            cls = __qClass(coll);
            if (cls == @global(String)) {
                if (pos <= __stringSize(coll)) {
                    ch = __stringVal(coll)[pos-1];
                    RETURN ( __MKCHARACTER(ch) );
                }
            } else if (cls == @global(ByteArray)) {
                if (pos <= __byteArraySize(coll)) {
                    ch = __ByteArrayInstPtr(coll)->ba_element[pos-1];
                    RETURN ( __MKSMALLINT(ch) );
                }
            } else if (cls == @global(Array)) {
                if (pos <= __arraySize(coll)) {
                    RETURN ( __ArrayInstPtr(coll)->a_element[pos-1]);
                }
            }
        }
    }
%}
.
    (position > readLimit) ifTrue:[^ self pastEnd].
    ^ collection at:position
!

skipSeparators
    "skip all whitespace; next will return next non-white-space element.
     Return the peeked character or nil, if the end-of-stream was reached.
     - reimplemented for speed on String-Streams for faster scanning"

%{  /* NOCONTEXT */
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isString(coll) && __bothSmallInteger(p, l)) {
        REGISTER unsigned char *chars;
        REGISTER unsigned ch;
        REGISTER int pos;
        int limit;
        int sz;

        pos = __intVal(p);
        if (pos <= 0) {
            RETURN ( nil );
        }

        limit = __intVal(l);
        sz = __qSize(coll) - OHDR_SIZE;
        if (limit > sz) {
            limit = sz;
        }

        chars = (unsigned char *)(__stringVal(coll) + pos - 1);
        while (pos <= limit) {
            pos++;
            ch = *chars++;
            if ((ch > 0x20)
             || ((ch != ' ')
                 && (ch != '\t')
                 && (ch != '\r')
                 && (ch != '\n')
                 && (ch != 0x0B))) {
                __INST(position) = __MKSMALLINT(pos-1);
                RETURN ( __MKCHARACTER(ch) );
            }
        }
        __INST(position) = __MKSMALLINT(pos);
        RETURN ( nil );
    }
%}.
    ^ super skipSeparators

    "
     |s|

     s := '     hello     world    ' readStream.
     s skipSeparators.
     s next.
    "
!

skipSeparatorsExceptCR
    "skip all whitespace except newlines;
     next will return next non-white-space element.
     - reimplemented for speed on String-Streams for faster scanning"

%{  /* NOCONTEXT */

    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isString(coll) && __bothSmallInteger(p, l)) {
	REGISTER unsigned char *chars;
	REGISTER unsigned ch;
	REGISTER int pos;
	int limit;

	pos = __intVal(p);
	if (pos <= 0) {
	    RETURN ( nil );
	}

	limit = __intVal(l);
	if (limit > (__qSize(coll) - OHDR_SIZE))
	    limit = __qSize(coll) - OHDR_SIZE;

	chars = (unsigned char *)(__stringVal(coll) + pos - 1);
	while (pos <= limit) {
	    ch = *chars++;
	    if ((ch != ' ') && (ch != '\t') && (ch != '\r') && (ch != 0x0B)) {
		__INST(position) = __MKSMALLINT(pos);
		RETURN ( __MKCHARACTER(ch) );
	    }
	    pos++;
	}
	__INST(position) = __MKSMALLINT(pos);
	RETURN ( nil );
    }
%}
.
    ^ super skipSeparatorsExceptCR
!

skipThrough:anObject
    "skip all objects up-to and including anObject, return anObject on success,
     nil if end-of-stream is reached before. 
     On success, the next read operation will return the element after anObject.
     - reimplemented for speed on String-Streams for faster scanning"

%{  /* NOCONTEXT */
    OBJ coll, p, l;

    coll = __INST(collection);
    p = __INST(position);
    l = __INST(readLimit);

    if (__isString(coll)
     && __isCharacter(anObject)
     && __bothSmallInteger(p, l)) {
	REGISTER unsigned char *chars;
	REGISTER int pos, limit;
	unsigned ch;
	int sz;

	pos = __intVal(p);
	if (pos <= 0) {
	    RETURN ( nil );
	}

	limit = __intVal(l);
	sz = __stringSize(coll);
	if (limit > sz) limit = sz;

	chars = (unsigned char *)(__stringVal(coll) + pos - 1);
	ch = __intVal(_characterVal(anObject)) & 0xFF;
	while (pos < limit) {
	    if (*chars == ch) {
		ch = *++chars;
		pos++;
		__INST(position) = __MKSMALLINT(pos);
		RETURN ( anObject );
	    }
	    chars++;
	    pos++;
	}
	RETURN ( nil );
    }
%}
.
    ^ super skipThrough:anObject
! !

!ReadStream methodsFor:'writing'!

nextPut:anElement
    "catch write access to readstreams - report an error"

    self shouldNotImplement
! !

!ReadStream  class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/Attic/ReadStr.st,v 1.31 1996-09-13 16:49:38 cg Exp $'
! !