String.st
author Claus Gittinger <cg@exept.de>
Thu, 15 Jul 1999 21:45:54 +0200
changeset 4392 26fb48f04e1b
parent 4390 0cf1f8bd7319
child 4631 f07d1701b537
permissions -rw-r--r--
comment

"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

CharacterArray variableByteSubclass:#String
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'Collections-Text'
!

!String primitiveDefinitions!
%{

#include <stdio.h>
#define _STDIO_H_INCLUDED_

#include <ctype.h>

#ifdef LINUX
# define __STRINGDEFS__
# include <linuxIntern.h>
#else
  extern char *strchr();
#endif

/*
 * old st/x creates strings with spaces in it;
 * new st/x will fill it with zeros (for st-80 compatibility)
 * the define below sets old behavior.
 */
#define INITIALIZE_WITH_SPACE
%}
! !

!String class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    Strings are ByteArrays storing Characters.

    Strings are kind of kludgy: to allow for easy handling by c-functions,
    there is always one 0-byte added at the end, which is not counted
    in the strings size.
    Also, the at:put: method does not allow for storing 0-bytes.
    (to do this, the basicAt:put: and basicNew: methods are redefined)

    You cannot add any instvars to String, since the the run time system & compiler
    creates literal strings and knows that strings have no named instvars.
    If you really need strings with instVars, you have to create a subclass
    of String (the access functions defined here can handle this).
    A little warning though: not all smalltalk systems allow subclassing String,
    so your program may become unportable if you do so.

    [author:]
	Claus Gittinger

    [see also:]
	Text StringCollection TwoByteString JISEncodedString
	Symbol
"
! !

!String class methodsFor:'instance creation'!

basicNew:anInteger
    "return a new empty string with anInteger characters"

%{  /* NOCONTEXT */

    OBJ newString;
    REGISTER int len;
    REGISTER unsigned char *cp;
    REGISTER OBJ *op;
    int nInstVars, instsize;

    if (__isSmallInteger(anInteger)) {
	len = __intVal(anInteger);
	if (len >= 0) {
	    instsize = OHDR_SIZE + len + 1;
	    if (self == String) {
		if (__CanDoQuickNew(instsize)) {        /* OBJECT ALLOCATION */
		    /*
		     * the most common case
		     */
		    __qCheckedNew(newString, instsize);
		    __InstPtr(newString)->o_class = self; /* no need for PROTECT - there was no GC */
		    __qSTORE(newString, self);

		    cp = __stringVal(newString);

#if defined(memset4) && !defined(NON_ASCII)
		    {
			/* 
			 * no sizeof(int) here please -
			 * - memset4 (if defined) fills 4-bytes on ALL machines 
			 */
			int l4 = len >> 2;

			if (len & 3) l4++;
			memset4(cp, 0x20202020, l4);
			cp[len] = '\0';
		    }
#else
# ifdef FAST_MEMSET
		    memset(cp, ' ', len);
		    cp[len] = '\0';
# else
		    while (len >= 8) {
#  ifndef NON_ASCII       /* i.e. EBCDIC  */
#   ifdef INT64
			((INT64 *)cp)[0] = 0x2020202020202020L;
#   else
			((int *)cp)[0] = 0x20202020;
			((int *)cp)[1] = 0x20202020;
#   endif
#  else
			cp[0] = cp[1] = cp[2] = cp[3] = ' ';
			cp[4] = cp[5] = cp[6] = cp[7] = ' ';
#  endif
			cp += 8; 
			len -= 8;
		    }
		    while (len--)
			*cp++ = ' ';
		    *cp = '\0';
# endif /* not FAST_MEMSET */
#endif /* not memset4 */

		    RETURN (newString);
		}
		nInstVars = 0;
	    } else {
		nInstVars = __intVal(__ClassInstPtr(self)->c_ninstvars);
		instsize += __OBJS2BYTES__(nInstVars);
	    }

	    __PROTECT_CONTEXT__;
	    __qNew(newString, instsize);        /* OBJECT ALLOCATION */
	    __UNPROTECT_CONTEXT__;

	    if (newString == nil) goto fail;

	    __InstPtr(newString)->o_class = self;
	    __qSTORE(newString, self);

	    if (nInstVars) {
		/*
		 * nil-out instvars
		 */
#if defined(memset4)
		memset4(__InstPtr(newString)->i_instvars, nil, nInstVars);
#else
# if defined(FAST_MEMSET) && ! defined(NEGATIVE_ADDRESSES)
		/*
		 * knowing that nil is 0
		 */
		memset(__InstPtr(newString)->i_instvars, 0, __OBJS2BYTES__(nInstVars));
# else
		op = __InstPtr(newString)->i_instvars;
		do {
		    *op++ = nil;
		} while (--nInstVars);
# endif
#endif
		cp = __stringVal(newString) + __OBJS2BYTES__(nInstVars);
	    } else {
		cp = __stringVal(newString);
	    }

	    /*
	     * fill with spaces
	     */
#ifdef FAST_MEMSET
	    memset(cp, ' ', len);
	    *(cp + len) = '\0';
#else
	    while (len >= 8) {
# ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
#  ifdef INT64
		((INT64 *)cp)[0] = 0x2020202020202020L;
#  else
		((int *)cp)[0] = 0x20202020;
		((int *)cp)[1] = 0x20202020;
#  endif
# else
		cp[0] = cp[1] = cp[2] = cp[3] = ' ';
		cp[4] = cp[5] = cp[6] = cp[7] = ' ';
# endif
		cp += 8;
		len -= 8;
	    }
	    while (len--)
		*cp++ = ' ';
	    *cp = '\0';
#endif
	    RETURN (newString);
	}
    }
fail: ;;
%}.
    "
     invalid argument, or out-of-memory:
     use error handling in superclass
    "
    ^ (super basicNew:anInteger) atAllPut:(Character space)
!

new:n
    ^ self basicNew:n
!

readFrom:aStream onError:exceptionBlock
    "read & return the next String from the (character-)stream aStream;
     skipping all whitespace first; return the value of exceptionBlock,
     if no string can be read. The sequence of characters as read from the 
     stream must be one as stored via storeOn: or storeString."

    |collected char|

    "
     this method is not to be inherited
     (i.e. not ok for subclasses; Symbol, for example)
    "
    self ~~ String ifTrue:[
	^ super readFrom:aStream onError:exceptionBlock
    ].

    "skip whiteSpace"
    aStream skipSeparators.
    (aStream next == $') ifTrue:[
	collected := WriteStream on:(String new).
	[true] whileTrue:[
	    aStream atEnd ifTrue:[
		"/ mhmh - reached the end without a closing quote
		"/ looks like an error to me ...
		^ exceptionBlock value
	    ].
	    char := aStream next.
	    char == $' ifTrue:[
		"/ look for another quote
		aStream peekOrNil == $' ifFalse:[
		    ^ collected contents
		].
		aStream next.
	    ].
	    collected nextPut:char
	]
    ].
    ^ exceptionBlock value

    "
     String readFrom:('''hello world''' readStream) 
     String readFrom:('''hello '''' world''' readStream) 
     String readFrom:('1 ''hello'' ' readStream)
     String readFrom:('1 ''hello'' ' readStream) onError:['foobar']  
    "

    "Modified: / 14.4.1998 / 18:46:26 / cg"
!

unititializedNew:anInteger
    "redefine it back - strings must have a 0-byte at the end"

    ^ self basicNew:anInteger
! !

!String class methodsFor:'binary storage'!

binaryDefinitionFrom:stream manager:manager
    "read a binary representation from stream. This is only invoked
     for long strings. Short strings are stored with 1byte length."

    |s len|

    "take care of subclasses ..."
    ((self == String) or:[self == Symbol]) ifTrue:[
	len := stream nextNumber:4.
	s := String basicNew:len.
	stream nextBytes:len into:s startingAt:1.
	^ s
    ].
    ^ super binaryDefinitionFrom:stream manager:manager

    "Modified: / 2.11.1997 / 16:18:37 / cg"
! !

!String class methodsFor:'queries'!

defaultPlatformClass
    "dummy for ST-80 compatibility"

    ^ self

    "Created: 6.6.1997 / 18:25:56 / cg"
!

isBuiltInClass
    "return true if this class is known by the run-time-system.
     Here, true is returned for myself, false for subclasses."

    ^ self == String

    "Modified: 23.4.1996 / 16:00:38 / cg"
! !

!String methodsFor:'accessing'!

at:index
    "return the character at position index, an Integer.
     Reimplemented here to avoid the additional at:->basicAt: send
     (which we can do here, since at: is obviously not redefined in a subclass).
     This method is the same as at:."

%{  /* NOCONTEXT */

    REGISTER int indx;
    REGISTER OBJ slf, cls;

    if (__isSmallInteger(index)) {
        slf = self;
        cls = __qClass(slf);
        indx = __intVal(index) - 1;
        if (cls != String) {
            if (indx < 0) goto badIndex;
            indx += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
        }
        if ((unsigned)indx < (unsigned)(__stringSize(slf))) {
            RETURN ( __MKCHARACTER(__stringVal(slf)[indx] & 0xFF) );
        }
    }
badIndex: ;
%}.
    index isInteger ifFalse:[
        ^ self indexNotInteger:index
    ].
    ^ self subscriptBoundsError:index
!

at:index put:aCharacter
    "store the argument, aCharacter at position index, an Integer.
     Return aCharacter (sigh).
     Reimplemented here to avoid the additional at:put:->basicAt:put: send
     (but only for Strings, since subclasses may redefine basicAt:put:).
     This method is the same as basicAt:put:."

%{  /* NOCONTEXT */

    REGISTER int value, indx;
    REGISTER OBJ slf;

    slf = self;

    if (__isString(slf)) {
	if (__isCharacter(aCharacter)) {
	    value = __intVal(_characterVal(aCharacter));
	    if (((unsigned)value <= 0xFF)
	     && __isSmallInteger(index)) {
		indx = __intVal(index) - 1;
		if ((unsigned)indx < (unsigned)(__stringSize(slf))) {
		    __stringVal(slf)[indx] = value;
		    RETURN ( aCharacter );
		}
	    }
	}
    }
%}.
    (self isMemberOf:String) ifFalse:[
	^ super at:index put:aCharacter
    ].

    (aCharacter isMemberOf:Character) ifFalse:[
	"
	 tried to store something which is not a character
	"
	^ self elementNotCharacter
    ].
    (aCharacter asciiValue between:1 and:255) ifFalse:[
	"
	 tried to store a multibyte character
	"
	^ self elementBoundsError
    ].
    "
     invalid index
    "
    index isInteger ifFalse:[
	^ self indexNotInteger:index
    ].
    ^ self subscriptBoundsError:index

    "Modified: 19.4.1996 / 11:40:27 / cg"
!

basicAt:index
    "return the character at position index, an Integer
     - reimplemented here since we return characters"

%{  /* NOCONTEXT */

    REGISTER int indx;
    REGISTER OBJ slf, cls;

    if (__isSmallInteger(index)) {
        slf = self;
        cls = __qClass(slf);
        indx = __intVal(index) - 1;
        if (cls != String) {
            if (indx < 0) goto badIndex;
            indx += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
        }
        if ((unsigned)indx < (unsigned)(__stringSize(slf))) {
            RETURN ( __MKCHARACTER(__stringVal(slf)[indx] & 0xFF) );
        }
    }
badIndex: ;
%}.
    index isInteger ifFalse:[
        ^ self indexNotInteger:index
    ].
    ^ self subscriptBoundsError:index
!

basicAt:index put:aCharacter
    "store the argument, aCharacter at position index, an Integer.
     Returns aCharacter (sigh).
     - reimplemented here since we store characters"

%{  /* NOCONTEXT */

    REGISTER int value, indx;
    REGISTER OBJ slf;
    REGISTER OBJ cls;

    slf = self;

    if (__isCharacter(aCharacter)) {
        value = __intVal(_characterVal(aCharacter));
        if (((unsigned)value <= 0xFF)
         && __isSmallInteger(index)) {
            cls = __qClass(slf);
            indx = __intVal(index) - 1;
            if (cls != String) {
                if (indx < 0) goto badIndex;
                indx += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
            }
            if ((unsigned)indx < (unsigned)(__stringSize(slf))) {
                __stringVal(slf)[indx] = value;
                RETURN ( aCharacter );
            }
        }
    }
badIndex: ;
%}.
    (aCharacter isMemberOf:Character) ifFalse:[
        "
         tried to store something which is not a character
        "
        ^ self elementNotCharacter
    ].
    (aCharacter asciiValue between:1 and:255) ifFalse:[
        "
         tried to store a multibyte character
        "
        ^ self elementBoundsError
    ].
    "
     invalid index
    "
    index isInteger ifFalse:[
        ^ self indexNotInteger:index
    ].
    ^ self subscriptBoundsError:index

    "Modified: 19.4.1996 / 11:15:48 / cg"
!

basicSize
    "return the number of characters in myself.
     Redefined here to exclude the 0-byte at the end."

%{  /* NOCONTEXT */
    REGISTER OBJ slf, cls;

    slf = self;

    cls = __qClass(slf);
    if (cls == String) {
	RETURN ( __MKSMALLINT(__stringSize(slf)) );
    }
    RETURN ( __MKSMALLINT(__stringSize(slf)
			  - __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars))));
%}
!

size
    "return the number of characters in myself.
     Reimplemented here to avoid the additional size->basicSize send
     (which we can do here, since size is obviously not redefined in a subclass).
     This method is the same as basicSize."

%{  /* NOCONTEXT */
    REGISTER OBJ cls, slf;

    slf = self;
    cls = __qClass(slf);
    if (cls == String) {
	RETURN ( __MKSMALLINT(__stringSize(slf)) );
    }
    RETURN ( __MKSMALLINT(__stringSize(slf)
			 - __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars))));
%}
! !

!String methodsFor:'binary storage'!

storeBinaryDefinitionOn:stream manager:manager
    "append a binary representation of the receiver onto stream.
     Redefined since short Strings can be stored with a special type code 
     in a more compact way.
     This is an internal interface for the binary storage mechanism."

    |myClass myBasicSize|

    "/ not for subclasses with named instVars.
    (myClass := self class) instSize ~~ 0 ifTrue:[
	^ super storeBinaryDefinitionOn:stream manager:manager
    ].

    myBasicSize := self basicSize.

    "/ can use a more compact representation;
    "/ but not for subclasses ...

    ((myClass == String) 
    and:[myBasicSize <= 255]) ifTrue:[
	"/ special encoding: <codeForString> <len> <bytes> ...
	stream nextPut:(manager codeForString); nextPut:myBasicSize.
    ] ifFalse:[
	manager putIdOfClass:myClass on:stream.
	stream nextNumber:4 put:myBasicSize.
    ].
    stream nextPutBytes:myBasicSize from:self startingAt:1.

    "Modified: / 2.11.1997 / 15:28:56 / cg"
! !

!String methodsFor:'character searching'!

identityIndexOf:aCharacter
    "return the index of the first occurrences of the argument, aCharacter
     in the receiver or 0 if not found - reimplemented here for speed."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
#ifdef FAST_STRCHR
    char *strchr();
#else
    REGISTER unsigned byteValue;
    REGISTER int index;
    char c;
#endif
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	cp = __stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
#ifdef FAST_STRCHR
	cp = (unsigned char *) strchr(cp, __intVal(_characterVal(aCharacter)));
	if (cp) {
	    RETURN ( __MKSMALLINT(cp - __stringVal(self) + 1) );
	}
#else
	byteValue = __intVal(_characterVal(aCharacter));
	index = 1;
	while (c = *cp++) {
	    if (c == byteValue) { RETURN ( __MKSMALLINT(index) ); }
	    index++;
	}
#endif
	RETURN ( __MKSMALLINT(0));
    }
%}.
    "/ with identity compares, only characters can be in myself
    ^ 0

    "
     'hello world' indexOf:(Character space)                  
     'hello world' indexOf:$A                      
    "
!

includes:aCharacter
    "return true if the argument, aCharacter is included in the receiver
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int byteValue;
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	byteValue = __intVal(_characterVal(aCharacter));
	cp = __stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
#ifdef FAST_STRCHR
	cp = (unsigned char *) strchr(cp, __intVal(_characterVal(aCharacter)));
	if (cp) {
	    RETURN ( true );
	}
#else
	while (*cp) {
	    if (*cp == byteValue) {
		RETURN ( true );
	    }
	    cp++;
	}
#endif
	RETURN (false);
    }
%}.
    ^ super includes:aCharacter

    "
     'hello world' includes:$a
     'hello world' includes:$o  
     'hello world' includes:$x  
     'hello world' includes:1    
     'hello world' asTwoByteString includes:$o  
    "
!

includesAny:aCollection
    "return true, if the receiver includes any of the characters in the
     argument, aCollection.
     - redefined for speed if the argument is a String."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned char *matchP;
    OBJ cls;

    if (__isString(aCollection)) {
	matchP = __stringVal(aCollection);
	cp = __stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

	while (*cp) {
	    if (strchr(matchP, *cp)) {
		RETURN ( true );
	    }
	    cp++;
	}
	RETURN ( false );
    }
%}
.
    ^ super includesAny:aCollection

    "
     'hello world' includesAny:'abcd'                      
     'hello world' includesAny:'xyz'                      
     'hello world' includesAny:(Array with:$a with:$b with:$d)   
     'hello world' includesAny:(Array with:$x with:$y)     
     'hello world' includesAny:(Array with:1 with:2)    
    "
!

indexOf:aCharacter
    "return the index of the first occurrences of the argument, aCharacter
     in the receiver or 0 if not found - reimplemented here for speed."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned byteValue;
    REGISTER int index;
#ifdef FAST_STRCHR
    char *strchr();
#else
    unsigned char c;
#endif
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	cp = __stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

	byteValue = __intVal(_characterVal(aCharacter));
	if (byteValue == 0) {
	    index = strlen(cp);
	    if (index >= __stringSize(self)) {
		index = 0;
	    } else {
		index++;
	    }
	    RETURN ( __MKSMALLINT(index) );
	}

#ifdef FAST_STRCHR
	cp = (unsigned char *) strchr(cp, byteValue);
	if (cp) {
	    RETURN ( __MKSMALLINT(cp - __stringVal(self) + 1) );
	}
#else
	index = 1;
	while (c = *cp++) {
	    if (c == byteValue) { RETURN ( __MKSMALLINT(index) ); }
	    index++;
	}
#endif
	RETURN ( __MKSMALLINT(0));
    }
%}.
    ^ super indexOf:aCharacter

    "
     'hello world' indexOf:(Character space)                  
     'hello world' indexOf:$A                      
    "
!

indexOf:aCharacter startingAt:start
    "return the index of the first occurrence of the argument, aCharacter
     in myself starting at start, anInteger or 0 if not found;
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int index;
    REGISTER unsigned byteValue;
#ifdef FAST_STRCHR
    char *strchr();
#else
    unsigned char c;
#endif
    int len;
    OBJ cls;

    if (__isSmallInteger(start)) {
	if (__isCharacter(aCharacter)) {
	    byteValue = __intVal(_characterVal(aCharacter));
	    index = __intVal(start);
	    if (index <= 0)
		index = 1;
	    if ((cls = __qClass(self)) != String)
		index += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
	    len = __stringSize(self);
	    if (index <= len) {
		cp = __stringVal(self) + index - 1;

		if (byteValue == 0) {
		    index += strlen(cp);
		    if (index > len) {
			index = 0;
		    } 
		    RETURN ( __MKSMALLINT(index) );
		}

#ifdef FAST_STRCHR
		cp = (unsigned char *) strchr(cp, byteValue);
		if (cp) {
		    RETURN ( __MKSMALLINT(cp - __stringVal(self) + 1) );
		}
#else
		while (c = *cp++) {

		    if (c == byteValue) {
			RETURN ( __MKSMALLINT(index) );
		    }
		    index++;
		}
#endif
	    }
	}
	RETURN ( __MKSMALLINT(0) );
    }
%}.
    ^ super indexOf:aCharacter startingAt:start

    "
     'hello world' indexOf:$l startingAt:1 
     'hello world' indexOf:$l startingAt:5  
    "
!

indexOfAny:aCollectionOfCharacters startingAt:start
    "return the index of the first occurrence of any character in aCollectionOfCharacters,
     in myself starting at start, anInteger or 0 if not found;
     - reimplemented here for speed if aCollectionOfCharacters is a string."

%{  /* NOCONTEXT */

    REGISTER unsigned char *ccp;
    REGISTER unsigned char *cp;
    REGISTER int index;
    unsigned char *matchP;
    unsigned char c;
    int len;
    OBJ cls;

    if (__isSmallInteger(start)
     && __isString(aCollectionOfCharacters)) {
	matchP = __stringVal(aCollectionOfCharacters);
	index = __intVal(start);
	if (index <= 0)
	    index = 1;
	if ((cls = __qClass(self)) != String)
	    index += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
	len = __stringSize(self);
	if (index <= len) {
	    cp = __stringVal(self) + index - 1;
	    while (c = *cp++) {
		for (ccp = matchP; *ccp ; ccp++) {
		    if (*ccp == c) {
			RETURN ( __MKSMALLINT(index) );
		    }
		}
		index++;
	    }
	}
	RETURN ( __MKSMALLINT(0) );
    }
%}.
    "/
    "/ fallback: 1st argument not a string or error
    "/
    ^ super indexOfAny:aCollectionOfCharacters startingAt:start

    "
     'hello world' indexOfAny:'eoa' startingAt:1   
     'hello world' indexOfAny:'eoa' startingAt:6 
     'hello world' indexOfAny:'AOE' startingAt:1 
    "
!

indexOfControlCharacterStartingAt:start
    "return the index of the next control character;
     that is a character with asciiValue < 32.
     Return 0 if none is found."

%{  /* NOCONTEXT */

#ifndef NON_ASCII       /* i.e. not EBCDIC ;-) */
    REGISTER unsigned char *cp;
    REGISTER unsigned char c;
    REGISTER unsigned char *cpEnd;
    int len, index;
    OBJ cls;

    index = __intVal(start);
    if (index <= 0) {
	index = 1;
    }
    if ((cls = __qClass(self)) != String)
	index += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    len = __stringSize(self);
    cpEnd = __stringVal(self) + len;
    cp = __stringVal(self) + index - 1;
    if (cp < cpEnd) {
	while (cp < cpEnd) {
	    if (*cp++ < ' ') {
		RETURN ( __MKSMALLINT(cp - __stringVal(self)) );
	    }
	}
	RETURN ( __MKSMALLINT(0) );
    }
#endif
%}.
    ^ super indexOfControlCharacterStartingAt:start

    "
     'hello world'             indexOfControlCharacterStartingAt:1 
     'hello world\foo' withCRs indexOfControlCharacterStartingAt:1 
     '1\' withCRs indexOfControlCharacterStartingAt:1 
     '1\' withCRs indexOfControlCharacterStartingAt:2 
    "
!

indexOfNonSeparatorStartingAt:start
    "return the index of the next non-whiteSpace character"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned char c;
    int len, index;
    OBJ cls;

    index = __intVal(start);
    if (index <= 0) {
	index = 1;
    }
    if ((cls = __qClass(self)) != String)
	index += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    len = __stringSize(self);
    if (index > len) {
	RETURN ( __MKSMALLINT(0) );
    }
    cp = __stringVal(self) + index - 1;
    while (c = *cp++) {
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
	if (c > ' ')
#endif
	if ((c != ' ') && (c != '\t') && (c != '\n')
	 && (c != '\r') && (c != '\f')) {
	    RETURN ( __MKSMALLINT(cp - __stringVal(self)) );
	}
    }
%}.
    ^ 0

    "
     'hello world' indexOfNonWhiteSpaceStartingAt:3 
     'hello world' indexOfNonWhiteSpaceStartingAt:7 
    "
!

indexOfSeparatorStartingAt:start
    "return the index of the next separator character"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned char c;
    int len, index;
    OBJ cls;

    index = __intVal(start);
    if (index <= 0) {
	index = 1;
    }
    if ((cls = __qClass(self)) != String)
	index += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    len = __stringSize(self);
    if (index > len) {
	RETURN ( __MKSMALLINT(0) );
    }
    cp = __stringVal(self) + index - 1;
    while (c = *cp++) {
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
	if (c <= ' ')
#endif
	if ((c == ' ') || (c == '\t') || (c == '\n')
	 || (c == '\r') || (c == '\f')) {
	    RETURN ( __MKSMALLINT(cp - __stringVal(self)) );
	}
    }
%}
.
    ^ 0

    "
     'hello world' indexOfSeparatorStartingAt:3 
     'hello world' indexOfSeparatorStartingAt:7 
    "
!

occurrencesOf:aCharacter
    "count the occurrences of the argument, aCharacter in myself
      - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned byteValue;
    REGISTER int count;
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	count = 0;
	byteValue = __intVal(_characterVal(aCharacter));
	cp = __stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
	while (*cp) {
	    if (*cp++ == byteValue) count++;
	}
	RETURN ( __MKSMALLINT(count) );
    }
%}.
    ^ super occurrencesOf:aCharacter

    "
     'hello world' occurrencesOf:$a
     'hello world' occurrencesOf:$w
     'hello world' occurrencesOf:$l 
     'hello world' occurrencesOf:$x  
     'hello world' occurrencesOf:1 
    "
! !

!String methodsFor:'comparing'!

= aString
    "Compare the receiver with the argument and return true if the
     receiver is equal to the argument. Otherwise return false.
     This compare is case-sensitive (i.e. 'Foo' is NOT = 'foo').
     Use sameAs: to compare with case ignored."

%{  /* NOCONTEXT */

    int l1, l2;
    REGISTER OBJ s = aString;
    unsigned char *cp1, *cp2;
    OBJ cls;
    OBJ myCls;
    INT addrDelta;

    if (s == self) {
	RETURN ( true );
    }
    if (! __isNonNilObject(s)) {
	RETURN ( false );
    }

    cls = __qClass(s);
    myCls = __qClass(self);

    if ((cls == String) || (cls == Symbol) || (cls == myCls)) {
	cp2 = __stringVal(s);
	l2 = __stringSize(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	    l2 -= n;
	}

	cp1 = __stringVal(self);
	l1 = __stringSize(self);
	/*
	 * care for instances of subclasses ...
	 */
	if (myCls != String) {
	    int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(myCls)->c_ninstvars));

	    cp1 += n;
	    l1 -= n;
	}

	if (l1 != l2) {
	    RETURN ( false );
	}
#ifdef FAST_STRNCMP
	RETURN ( (strncmp(cp1, cp2, l1) == 0) ? true : false );
#else
	addrDelta = cp2 - cp1;
# ifdef UNROLL_LOOPS
	while (l1 >= (sizeof(unsigned INT)*4)) {
	    if (((unsigned INT *)cp1)[0] != ((unsigned INT *)(cp1+addrDelta))[0]) {
		RETURN (false);
	    }
	    if (((unsigned INT *)cp1)[1] != ((unsigned INT *)(cp1+addrDelta))[1]) {
		RETURN (false);
	    }
	    if (((unsigned INT *)cp1)[2] != ((unsigned INT *)(cp1+addrDelta))[2]) {
		RETURN (false);
	    }
	    if (((unsigned INT *)cp1)[3] != ((unsigned INT *)(cp1+addrDelta))[3]) {
		RETURN (false);
	    }
	    l1 -= (sizeof(unsigned INT) * 4);
	    cp1 += (sizeof(unsigned INT) * 4);
	}
# endif /* UNROLL_LOOPS */
	while (l1 >= sizeof(unsigned INT)) {
	    if (*((unsigned INT *)cp1) != *((unsigned INT *)(cp1+addrDelta))) {
		RETURN (false);
	    }
	    l1 -= sizeof(unsigned INT);
	    cp1 += sizeof(unsigned INT);
	}
	if (l1 >= sizeof(unsigned short)) {
	    if (*((unsigned short *)cp1) != *((unsigned short *)(cp1+addrDelta))) {
		RETURN (false);
	    }
	    l1 -= sizeof(unsigned short);
	    cp1 += sizeof(unsigned short);
	}
	while (l1) {
	    if (*cp1 != *(cp1+addrDelta)) {
		RETURN (false);
	    }
	    l1--;
	    cp1++;
	}

	RETURN (true);
#endif
    }
%}.
    ^ super = aString

    "
     'foo' = 'Foo' 
     'foo' sameAs: 'Foo' 
    "
!

> aString
    "Compare the receiver with the argument and return true if the
     receiver is greater than the argument. Otherwise return false.
     No national variants are honored; use after: for this.
     In contrast to ST-80, case differences are NOT ignored, thus
     'foo' > 'Foo' will return true. 
     This may change."

%{  /* NOCONTEXT */

    int len1, len2, cmp;
    REGISTER OBJ s = aString;
    unsigned char *cp1, *cp2;
    OBJ cls;
    OBJ myCls;

    if (__isNonNilObject(s)) {
	cls = __qClass(s);
	myCls = __qClass(self);

	if ((cls == String) || (cls == Symbol) || (cls == myCls)) {
	    cp2 = __stringVal(s);
	    len2 = __stringSize(s);
	    /*
	     * care for instances of subclasses ...
	     */
	    if (cls != String) {
		int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

		cp2 += n;
		len2 -= n;
	    }

	    cp1 = __stringVal(self);
	    len1 = __stringSize(self);
	    /*
	     * care for instances of subclasses ...
	     */
	    if (myCls != String) {
		int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(myCls)->c_ninstvars));

		cp1 += n;
		len1 -= n;
	    }

	    if (len1 <= len2)
		cmp = strncmp(cp1, cp2, len1);
	    else
		cmp = strncmp(cp1, cp2, len2);

	    if (cmp > 0) {
		RETURN ( true );
	    }
	    if ((cmp == 0) && (len1 > len2)) {
		RETURN ( true );
	    }
	    RETURN ( false );
	}
    }
%}.
    ^ super > aString
!

after:aString
    "Compare the receiver with the argument and return true if the
     receiver should come after the argument in a sorted list. 
     Otherwise return false.
     The comparison is language specific, depending on the value of
     LC_COLLATE, which is initialized from the environment.

     STUPID:
	#after has a completely different meaning in SeqColl ..."

    ^ (self compareCollatingWith:aString) > 0

    "
     'max' > 'mäx'       
     'man' > 'mäx'      
     'max' after:'mäx'
     'man' after:'mäx' 
    "

    "Modified: 10.5.1996 / 14:02:45 / cg"
!

compareCollatingWith:aString
    "Compare the receiver with the argument and return 1 if the receiver is
     greater, 0 if equal and -1 if less than the argument in a sorted list. 
     The comparison is language specific, depending on the value of
     LC_COLLATE, which is in the shell environment."

%{  /* NOCONTEXT */

    int cmp;
    REGISTER OBJ s = aString;
    unsigned char *cp1, *cp2;
    OBJ cls;
    OBJ myCls;

    if (__isNonNilObject(s)) {
	cls = __qClass(s);
	myCls = __qClass(self);

	if ((cls == String) || (cls == Symbol) || (cls == myCls)) {
	    cp1 = __stringVal(self);

	    /*
	     * care for instances of subclasses ...
	     */
	    if (myCls != String) {
		int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(myCls)->c_ninstvars));

		cp1 += n;
	    }

	    cp2 = __stringVal(s);
	    /*
	     * care for instances of subclasses ...
	     */
	    if (cls != String) {
		int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

		cp2 += n;
	    }

#ifdef HAS_STRCOLL
	    cmp = strcoll(cp1, cp2);
#else
	    cmp = strcmp(cp1, cp2);
#endif

	    if (cmp > 0) {
		RETURN ( __MKSMALLINT(1) );
	    }
	    if (cmp < 0) {
		RETURN ( __MKSMALLINT(-1) );
	    }
	    RETURN ( __MKSMALLINT(0) );
	}
    }
%}.
    "
     currently, this operation is only defined for strings, symbols
     and subclasses.
    "
    ^ self primitiveFailed
!

~= aString
    "Compare the receiver with the argument and return true if the
     receiver is not equal to the argument. Otherwise return false.
     This compare is case-sensitive (i.e. 'Foo' is NOT = 'foo').
     Actually, there is no need to redefine that method here,
     the default (= not as inherited) works ok.
     However, this may be heavily used and the redefinition saves an
     extra message send."

%{  /* NOCONTEXT */

    int l1, l2;
    REGISTER OBJ s = aString;
    unsigned char *cp1, *cp2;
    OBJ cls, myCls;
    INT addrDelta;

    if (s == self) {
	RETURN ( false );
    }
    if (! __isNonNilObject(s)) {
	RETURN ( true );
    }

    cls = __qClass(s);
    myCls = __qClass(self);

    if ((cls == String) || (cls == Symbol) || (cls == myCls)) {
	cp1 = __stringVal(self);
	l1 = __stringSize(self);
	/*
	 * care for instances of subclasses ...
	 */
	if (myCls != String) {
	    int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(myCls)->c_ninstvars));

	    cp1 += n;
	    l1 -= n;
	}

	cp2 = __stringVal(s);
	l2 = __stringSize(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	    l2 -= n;
	}

	if (l1 != l2) {
	    RETURN ( true );
	}

#ifdef FAST_STRNCMP
	RETURN ( (strncmp(cp1, cp2, l1) == 0) ? false : true );
#else
	addrDelta = cp2 - cp1;
# ifdef UNROLL_LOOPS
	while (l1 >= (sizeof(unsigned INT)*4)) {
	    if (((unsigned INT *)cp1)[0] != ((unsigned INT *)(cp1+addrDelta))[0]) {
		RETURN (true);
	    }
	    if (((unsigned INT *)cp1)[1] != ((unsigned INT *)(cp1+addrDelta))[1]) {
		RETURN (true);
	    }
	    if (((unsigned INT *)cp1)[2] != ((unsigned INT *)(cp1+addrDelta))[2]) {
		RETURN (true);
	    }
	    if (((unsigned INT *)cp1)[3] != ((unsigned INT *)(cp1+addrDelta))[3]) {
		RETURN (true);
	    }
	    l1 -= (sizeof(unsigned INT) * 4);
	    cp1 += (sizeof(unsigned INT) * 4);
	}
# endif /* UNROLL_LOOPS */
	while (l1 >= sizeof(unsigned INT)) {
	    if (*((unsigned INT *)cp1) != *((unsigned INT *)(cp1+addrDelta))) {
		RETURN (true);
	    }
	    l1 -= sizeof(unsigned INT);
	    cp1 += sizeof(unsigned INT);
	}
	if (l1 >= sizeof(unsigned short)) {
	    if (*((unsigned short *)cp1) != *((unsigned short *)(cp1+addrDelta))) {
		RETURN (true);
	    }
	    l1 -= sizeof(unsigned short);
	    cp1 += sizeof(unsigned short);
	}
	while (l1) {
	    if (*cp1 != *(cp1+addrDelta)) {
		RETURN (true);
	    }
	    l1--;
	    cp1++;
	}
	RETURN (false);
#endif
    }
%}.
    ^ super ~= aString
! !

!String methodsFor:'converting'!

asBoldText
    "return self as a bold text"

    ^Text string: self emphasis: #bold
!

asString
    "I am a string"

    ^ self

    "Created: 14.8.1997 / 11:37:03 / stefan"
!

asSymbol
    "return a unique symbol with the name taken from my characters."
%{
    OBJ newSymbol;
    OBJ cls;
    int indx;

    cls = __qClass(self);
    if (cls != String) {
	indx = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    } else {
	indx = 0;
    }
    newSymbol = __MKSYMBOL(__stringVal(self) + indx, (OBJ *)0);
    if (newSymbol) {
	RETURN ( newSymbol);
    }
%}.
    ^ ObjectMemory allocationFailureSignal raise.

    "
     'hello' asSymbol  
    "
!

asSymbolIfInterned
    "if a symbol with the receivers characters is already known, return it.
     Otherwise, return nil. This can be used to query for an existing
     symbol and is the same as
	self knownAsSymbol ifTrue:[self asSymbol] ifFalse:[nil]
     but slightly faster, since the symbol lookup operation is only
     performed once."

%{  /* NOCONTEXT */
    OBJ cls;
    int indx;

    cls = __qClass(self);
    if (cls != String) {
	indx = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    } else {
	indx = 0;
    }
    RETURN ( __SYMBOL_OR_NIL(__stringVal(self) + indx));
%}

    "
     'hello' asSymbolIfInterned
     'fooBarBaz' asSymbolIfInterned
    "
!

decodeAsLiteralArray
    "given a literalEncoding in the receiver,
     create & return the corresponding object.
     The inverse operation to #literalArrayEncoding."

    ^ self

    "Created: 25.2.1997 / 19:18:19 / cg"
!

literalArrayEncoding
    "encode myself as an array literal, from which a copy of the receiver
     can be reconstructed with #decodeAsLiteralArray."

    ^ self

    "Modified: 1.9.1995 / 02:25:45 / claus"
    "Modified: 22.4.1996 / 13:00:50 / cg"
!

withTabsExpanded
    "return a string with the characters of the receiver where all tabulator characters
     are expanded into spaces (assuming 8-col tabs).
     Notice: if the receiver does not contain any tabs, it is returned unchanged;
     otherwise a new string is returned.
     This does handle multiline strings.
     Rewritten for speed - because this is very heavily used when reading
     big files in the FileBrowser (and therefore speeds up fileReading considerably)."

%{  /* STACK:700 */
    unsigned char buffer[80*8 + 10];
    unsigned char *srcP, *dstP, *cp0;
    int idx, sz;
    int any = 0;
    OBJ newString;
    char c;

    if (__qClass(self) == String) {
	/*
	 * for small strings (< 80), do it without a prescan ...
	 * the buffer is large enough to even convert a
	 * receiver consisting fully of tabs.
	 */
	if (__stringSize(self) < 80) {
	    idx = 1;
	    for (srcP = __stringVal(self), dstP = buffer; (c = *srcP); srcP++) {
		if (c == '\t') {
		    any = 1;
		    while (idx % 8) {
			idx++;
			*dstP++ = ' ';
		    }
		    idx++;
		    *dstP++ = ' ';
		} else {
		    *dstP++ = c;
		    idx++;
		    if (c == '\n') {
			idx = 1;
		    }
		}
	    }
	    if (! any) RETURN(self);
	    *dstP = '\0';
	    RETURN (__MKSTRING_L(buffer, (dstP-buffer)));
	}
	/*
	 * for large strings, we compute the new size, allocate a new string
	 * and expand it.
	 *
	 * first, scan for size ...
	 */
	idx = 1;
	for (srcP = __stringVal(self), sz = 0; (c = *srcP); srcP++) {
	    if (c == '\t') {
		any = 1;
		while (idx % 8) {
		    idx++;
		    sz++;
		}
		idx++; sz ++;
	    } else {
		sz++; idx++;
		if (c == '\n') {
		    idx = 1;
		}
	    }
	}
	if (! any) RETURN(self);

	/*
	 * get the string
	 */
	sz = OHDR_SIZE + sz + 1;
	__qNew(newString, sz);  /* OBJECT ALLOCATION */
	if (newString != nil) {
	    __InstPtr(newString)->o_class = String;

	    /*
	     * expand
	     */
	    idx = 1;
	    for (srcP = __stringVal(self), dstP = cp0 = __stringVal(newString); (c = *srcP); srcP++) {
		if (c == '\t') {
		    while (idx % 8) {
			idx++;
			*dstP++ = ' ';
		    }
		    idx++;
		    *dstP++ = ' ';
		} else {
		    *dstP++ = c; idx++;
		    if (c == '\n') {
			idx = 1;
		    }
		}
	    }
	    *dstP++ = '\0';
	    RETURN (newString);
	}
    }
%}.
    ^ super withTabsExpanded
! !

!String methodsFor:'copying'!

, aString
    "return the concatenation of myself and the argument, aString as
     a String.
     - reimplemented here for speed"

%{
    int l1, l2, sz;
    REGISTER OBJ s = aString;
    REGISTER OBJ _string = String;
    OBJ myClass, argClass, newString;

    if (__isNonNilObject(s)) {
	myClass = __qClass(self);
	argClass = __qClass(s);
	/*
	 * can do it here if both are Strings/Symbols:
	 */
	if (((myClass == _string) || (myClass == Symbol))
	 && ((argClass == _string) || (argClass == Symbol))) {
	    l1 = __stringSize(self);
	    l2 = __stringSize(s);

	    sz = OHDR_SIZE + l1 + l2 + 1;
	    __qNew(newString, sz);      /* OBJECT ALLOCATION */
	    if (newString != nil) {
		char *cp1, *cp2;
		REGISTER unsigned char *dstp;

		__InstPtr(newString)->o_class = String;
		dstp = __stringVal(newString);
		cp1 = (char *) __stringVal(self);
		cp2 = (char *) __stringVal(aString);

#ifdef bcopy4
		/* knowing that allocation is 4-byte aligned and
		 * size rounded up to next 4-byte, the first copy
		 * can be done word-wise.
		 * that speeds up size-10-string , size-10-string 
		 * by 10% on a P5/200.
		 */
		{
		    int nw = l1 >> 2;

		    if (l1 & 3) nw++;
		    bcopy4(cp1, dstp, nw);
		    dstp += l1;
		}
#else
# ifdef FAST_MEMCPY
		bcopy(cp1, dstp, l1);
		dstp += l1;
# else
#  ifdef FAST_STRCPY
		strncpy(dstp, cp1, l1);
		dstp += l1;
#  else
		while (l1 >= 4) {
		    *(int *)dstp = *(int *)cp1;
		    dstp += 4; cp1 += 4;
		    l1 -= 4;
		}
		while (l1--) *dstp++ = *cp1++;
#  endif
# endif
#endif

#ifdef bcopy4
		if (((INT)dstp & 3) == 0) {
		    int nw = l2 >> 2;

		    if (l2 & 3) nw++;
		    bcopy4(cp2, dstp, nw);
		    *(dstp + l2) = '\0';
		    RETURN ( newString );
		}
#endif
                    
#ifdef FAST_MEMCPY
		bcopy(cp2, dstp, l2+1);
		dstp[l2] = '\0';
#else
# ifdef FAST_STRCPY
		strncpy(dstp, cp2, l2);
		dstp[l2] = '\0';
# else
		while (l2--) *dstp++ = *cp2++;
		*dstp = '\0';
# endif
#endif
		RETURN ( newString );
	    }
	}
    }
%}.
    ^ super , aString
!

concatenate:string1 and:string2
    "return the concatenation of myself and the arguments, string1 and string2.
     This is equivalent to self , string1 , string2
     - generated by compiler when such a construct is detected"

    |newString|

%{
    int len1, len2, len3, sz;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;

    if ((__isString(self) || __isSymbol(self))
     && (__isString(string1) || __isSymbol(string1))
     && (__isString(string2) || __isSymbol(string2))) {
	len1 = __stringSize(self);
	len2 = __stringSize(string1);
	len3 = __stringSize(string2);
	sz = OHDR_SIZE + len1 + len2 + len3 + 1;
	__qNew(newString, sz);  /* OBJECT ALLOCATION */
	if (newString != nil) {
	    __InstPtr(newString)->o_class = String;
	    dstp = __stringVal(newString);
#ifdef FAST_MEMCPY
	    bcopy(__stringVal(self), dstp, len1);
	    bcopy(__stringVal(string1), dstp + len1, len2);
	    bcopy(__stringVal(string2), dstp + len1 + len2, len3+1);
	    *(dstp + len1 + len2 + len3) = '\0';
#else
# ifdef FAST_STRCPY
	    strncpy(dstp, __stringVal(self), len1);
	    strncpy(dstp + len1, __stringVal(string1), len2);
	    strncpy(dstp + len1 + len2, __stringVal(string2), len3);
	    *(dstp + len1 + len2 + len3) = '\0';
# else
	    srcp = __stringVal(self);
	    while (len1--) *dstp++ = *srcp++;
	    srcp = __stringVal(string1);
	    while (len2--) *dstp++ = *srcp++;
	    srcp = __stringVal(string2);
	    while (len3--) *dstp++ = *srcp++;
	    *dstp = '\0';
# endif
#endif
	    RETURN ( newString );
	}
    }
%}.
    ^ super , string1 , string2
!

concatenate:string1 and:string2 and:string3
    "return the concatenation of myself and the string arguments.
     This is equivalent to self , string1 , string2 , string3
     - generated by compiler when such a construct is detected"

    |newString|

%{
    int len1, len2, len3, len4, sz;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;

    if ((__isString(self) || __isSymbol(self))
     && (__isString(string1) || __isSymbol(string1))
     && (__isString(string2) || __isSymbol(string2))
     && (__isString(string3) || __isSymbol(string3))) {
	len1 = __stringSize(self);
	len2 = __stringSize(string1);
	len3 = __stringSize(string2);
	len4 = __stringSize(string3);
	sz = OHDR_SIZE + len1 + len2 + len3 + len4 + 1;
	__qNew(newString, sz);  /* OBJECT ALLOCATION */
	if (newString != nil) {
	    __InstPtr(newString)->o_class = String;
	    dstp = __stringVal(newString);
#ifdef FAST_MEMCPY
	    bcopy(__stringVal(self), dstp, len1);
	    bcopy(__stringVal(string1), dstp + len1, len2);
	    bcopy(__stringVal(string2), dstp + len1 + len2, len3);
	    bcopy(__stringVal(string3), dstp + len1 + len2 + len3, len4+1);
	    *(dstp + len1 + len2 + len3 + len4) = '\0';
#else
# ifdef FAST_STRCPY
	    strncpy(dstp, __stringVal(self), len1);
	    strncpy(dstp + len1, __stringVal(string1), len2);
	    strncpy(dstp + len1 + len2, __stringVal(string2), len3);
	    strncpy(dstp + len1 + len2 + len3, __stringVal(string3), len4);
	    *(dstp + len1 + len2 + len3 + len4) = '\0';
# else
	    srcp = __stringVal(self);
	    while (len1--) *dstp++ = *srcp++;
	    srcp = __stringVal(string1);
	    while (len2--) *dstp++ = *srcp++;
	    srcp = __stringVal(string2);
	    while (len3--) *dstp++ = *srcp++;
	    srcp = __stringVal(string3);
	    while (len4--) *dstp++ = *srcp++;
	    *dstp = '\0';
# endif
#endif
	    RETURN ( newString );
	}
    }
%}.
    ^ super , string1 , string2 , string3
!

copyFrom:start
    "return the substring from start, anInteger to the end.
     This method will always return a string, even if the receiver 
     is a subclass-instance. This might change if there is a need.
     - reimplemented here for speed"

%{  /* NOCONTEXT */

#if !defined(FAST_MEMCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    REGISTER int count;
    int len, index1, sz;
    OBJ newString;
    OBJ myClass;

    myClass = __qClass(self);

#ifndef NO_PRIM_STRING
    if (__isSmallInteger(start)
     && ((myClass==String) || (myClass==Symbol))) {
	len = __stringSize(self);
	index1 = __intVal(start);
	if (index1 > 0) {
	    if (index1 <= len) {
		count = len - index1 + 1;
		sz = OHDR_SIZE + count + 1;

		__PROTECT_CONTEXT__
		__qNew(newString, sz);  /* OBJECT ALLOCATION */
		__UNPROTECT_CONTEXT__

		if (newString != nil) {
		    __InstPtr(newString)->o_class = String;
		    dstp = __stringVal(newString);
#ifdef FAST_MEMCPY
		    bcopy(__stringVal(self) + index1 - 1, dstp, count);
		    dstp[count] = '\0';
#else
# ifdef FAST_STRCPY
		    strncpy(dstp, __stringVal(self) + index1 - 1, count);
		    dstp[count] = '\0';
# else
		    srcp = __stringVal(self) + index1 - 1;
		    while (count--) {
			*dstp++ = *srcp++;
		    }
		    *dstp = '\0';
# endif
#endif
		    RETURN ( newString );
		}
	    }
	}
    }
#endif
%}.
    "fall back in case of non-integer index or out-of-bound index;
     will eventually lead to an out-of-bound signal raise"

    ^ super copyFrom:start
!

copyFrom:start to:stop
    "return the substring starting at index start, anInteger and ending
     at stop, anInteger. This method will always return a string, even
     if the receiver is a subclass-instance. This might change if there is a need.
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp;
    REGISTER unsigned char *dstp;
    REGISTER int count;
    int len, sz, index1, index2;
    OBJ newString;
    OBJ myClass;

    myClass = __qClass(self);

#ifndef NO_PRIM_STRING
    if (__bothSmallInteger(start, stop)
     && ((myClass==String) || (myClass==Symbol))) {
	len = __stringSize(self);
	index1 = __intVal(start);
	index2 = __intVal(stop);

	if ((index1 <= index2) && (index1 > 0)) {
	    if (index2 <= len) {
		count = index2 - index1 + 1;
		sz = OHDR_SIZE + count + 1;

		__PROTECT_CONTEXT__
		__qNew(newString, sz);  /* OBJECT ALLOCATION */
		__UNPROTECT_CONTEXT__

		if (newString != nil) {
		    __InstPtr(newString)->o_class = String;
		    dstp = __stringVal(newString);
		    srcp = __stringVal(self) + index1 - 1;
#ifdef bcopy4
		    {
			int nw = count >> 2;

			if (count & 3) {
			    nw++;
			}
			bcopy4(srcp, dstp, nw);
			dstp[count] = '\0';
		    }
#else
# ifdef FAST_MEMCPY
		    bcopy(srcp, dstp, count);
		    dstp[count] = '\0';
# else
#  ifdef FAST_STRCPY
		    strncpy(dstp, srcp, count);
		    dstp[count] = '\0';
#  else
		    while (count--) {
			*dstp++ = *srcp++;
		    }
		    *dstp = '\0';
#  endif
# endif
#endif
		    RETURN ( newString );
		}
	    }
	}
	/*
	 * allow empty copy
	 */
	if (index1 > index2) {
	    __PROTECT_CONTEXT__
	    __qNew(newString, OHDR_SIZE+1);     /* OBJECT ALLOCATION */
	    __UNPROTECT_CONTEXT__
	    if (newString != nil) {
		__InstPtr(newString)->o_class = String;
		(__stringVal(newString))[0] = '\0';
		RETURN ( newString );
	    }
	}
    }
#endif
%}.
    "fall back in case of non-integer index or out-of-bound index;
     will eventually lead to an out-of-bound signal raise"

    ^ super copyFrom:start to:stop
!

copyWith:aCharacter
    "return a new string containing the receivers characters
     and the single new character, aCharacter. 
     This is different from concatentation, which expects another string
     as argument, but equivalent to copy-and-addLast.
     Reimplemented here for more speed"

%{  /* NOCONTEXT */

    int count;
    int sz;
    REGISTER unsigned char *dstp;
    OBJ cls, newString;
    OBJ myClass;

    myClass = __qClass(self);

#ifndef NO_PRIM_STRING
    if (__isCharacter(aCharacter)
     && ((myClass==String) || (myClass==Symbol))) {
	count = __stringSize(self);
	sz = OHDR_SIZE + count + 1 + 1;

	__PROTECT_CONTEXT__
	__qNew(newString, sz);  /* OBJECT ALLOCATION */
	__UNPROTECT_CONTEXT__

	if (newString) {
	    __InstPtr(newString)->o_class = String;
	    dstp = __stringVal(newString);

#ifdef bcopy4
	    {
		int nw = count >> 2;
		char *srcp = (char *)__stringVal(self);

		if (count & 3) {
		    nw++;
		}
		bcopy4(srcp, dstp, nw);
		dstp += count;
	    }
#else
# ifdef FAST_MEMCPY
	    bcopy(__stringVal(self), dstp, count);
	    dstp += count;
# else
#  ifdef FAST_STRCPY
	    strncpy(dstp, __stringVal(self), count);
	    dstp += count;
#  else
	    {
		REGISTER unsigned char *srcp;

		srcp = __stringVal(self);
		while ((*dstp = *srcp++) != '\0')
		    dstp++;
	    }
#  endif
# endif
# endif
	    *dstp++ = __intVal(_characterVal(aCharacter));
	    *dstp = '\0';
	    RETURN (newString );
	}
    }
#endif
%}.
    "fall back in case of non-character arg;
     will eventually lead to an bad element signal raise"

    ^ super copyWith:aCharacter
!

deepCopy
    "return a copy of the receiver"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
!

deepCopyUsing:aDictionary
    "return a copy of the receiver - reimplemented to be a bit faster"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
!

shallowCopy
    "return a copy of the receiver"

    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super shallowCopy
!

simpleDeepCopy
    "return a copy of the receiver"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
! !

!String methodsFor:'filling & replacing'!

atAllPut:aCharacter
    "replace all elements with aCharacter
     - reimplemented here for speed"

%{  /* NOCONTEXT */

#ifndef FAST_MEMSET
    REGISTER unsigned char *dst;
    REGISTER int l;
#endif
    REGISTER int byteValue;

    if (__isCharacter(aCharacter) && __isString(self)) {
	byteValue = __intVal(_characterVal(aCharacter));

#ifdef FAST_MEMSET 
	memset(__stringVal(self), byteValue, __stringSize(self));
#else
	{
	    INT v;

	    v = (byteValue << 8) | byteValue;
	    v = (v << 16) | v;

	    dst = __stringVal(self);
	    l = __stringSize(self);

# ifdef FAST_MEMSET4 /* sorry intel: your stosd instruction is slower ... */
	    memset4(dst, v, l>>2);
	    l = l & 3;
# else
#  ifdef UINT64
	    {
		UINT64 v64;

	        v64 = v;
		v64 = (v64 << 32) | v;
	        while (l >= 8) {
		    ((UINT64 *)dst)[0] = v64;
		    dst += 8;
		    l -= 8;
		}
	    }
#  else /* no UINT64 */
	    while (l >= 16) {
		((int *)dst)[0] = v;
		((int *)dst)[1] = v;
		((int *)dst)[2] = v;
		((int *)dst)[3] = v;
		dst += 16;
		l -= 16;
	    }
	    if (l >= 8) {
		((int *)dst)[0] = v;
		((int *)dst)[1] = v;
		dst += 8;
		l -= 8;
	    }
	    if (l >= 4) {
		((int *)dst)[0] = v;
		dst += 4;
		l -= 4;
	    }
#   if 0
	    if (l >= 2) {
		((short *)dst)[0] = v;
		dst += 2;
		l -= 2;
	    }
#   endif

#  endif
# endif
	}

	/*
	 * remaining bytes
	 */
	while (l--)
	    *dst++ = byteValue;

#endif /* no FAST_MEMSET */

	RETURN ( self );
    }
%}.
    ^ super atAllPut:aCharacter

    "
     (String new:10) atAllPut:$*      
     String new:10 withAll:$*     
    "
!

replaceAll:oldCharacter with:newCharacter
    "replace all oldCharacters by newCharacter in the receiver.

     Notice: This operation modifies the receiver, NOT a copy;
     therefore the change may affect all others referencing the receiver."

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp;
    REGISTER unsigned oldVal, newVal;
    unsigned char c, cNext;

    if (__isCharacter(oldCharacter)
     && __isCharacter(newCharacter)
     && __isString(self)) {
	srcp = (unsigned char *)__stringVal(self);
	oldVal = __intVal(_characterVal(oldCharacter));
	newVal = __intVal(_characterVal(newCharacter));

	cNext = *srcp;
	while ((c = cNext) != '\0') {
	    cNext = srcp[1];
	    if (c == oldVal)
		*srcp = newVal;
	    srcp++;
	}
	RETURN ( self );
    }
%}.
    ^ super replaceAll:oldCharacter with:newCharacter

    "
     'helloWorld' copy replaceAll:$o with:$O   
     'helloWorld' copy replaceAll:$d with:$*   
     'helloWorld' copy replaceAll:$h with:$*   
    "
!

replaceFrom:start to:stop with:aString startingAt:repStart
    "replace the characters starting at index start, anInteger and ending
     at stop, anInteger with characters from aString starting at repStart.
     Return the receiver.

     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp, *dstp;
    REGISTER int count;
    int len, index1, index2;
    int repLen, repIndex;

#ifndef NO_PRIM_STRING
    if ((__isString(aString) || __isSymbol(aString))
     && __isString(self)
     && __bothSmallInteger(start, stop)) {
	len = __stringSize(self);
	index1 = __intVal(start);
	index2 = __intVal(stop);
	count = index2 - index1 + 1;
	if (count <= 0) {
	     RETURN (self);
	}
	if ((index2 <= len) && (index1 > 0)) {
	    repLen = __stringSize(aString);
	    repIndex = __intVal(repStart);
	    if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) {
		srcp = __stringVal(aString) + repIndex - 1;
		dstp = __stringVal(self) + index1 - 1;
		if (aString == self) {
		    /* take care of overlapping copy */
		    if (srcp < dstp) {
			/* must do a reverse copy */
			srcp += count;
			dstp += count;
			while (count-- > 0) {
			    *--dstp = *--srcp;
			}
			RETURN (self);
		    }
		}
#ifdef bcopy4
		/* copy quadbytes if pointers are aligned */
		/* 
		 * no sizeof(int) here please -
		 * - bcopy4 (if defined) copies 4-bytes on ALL machines 
		 */
		if ((count > 12)
		 && (((unsigned INT)srcp & 3) == 0)
		 && (((unsigned INT)dstp & 3) == 0)) {
		    int n;

		    n = count >> 2;        /* make it quads */
		    bcopy4(srcp, dstp, n);
		    n <<= 2;               /* back to chars */
		    dstp += n;
		    srcp += n;
		    count -= n;
		}
		while (count-- > 0) {
		    *dstp++ = *srcp++;
		}
#else
# ifdef FAST_MEMCPY
		bcopy(srcp, dstp, count);
# else
		/* copy longs if pointers are aligned */
		if ((((unsigned INT)srcp & (sizeof(INT)-1)) == 0)
		 && (((unsigned INT)dstp & (sizeof(INT)-1)) == 0)) {
		    while (count >= sizeof(INT)) {
			*((unsigned INT *)dstp) = *((unsigned INT *)srcp);
			dstp += sizeof(INT);
			srcp += sizeof(INT);
			count -= sizeof(INT);
		    }
		}
		while (count-- > 0) {
		    *dstp++ = *srcp++;
		}
# endif
#endif
		RETURN (self);
	    }
	}
    }
#endif
%}.
    ^ super replaceFrom:start to:stop with:aString startingAt:repStart

!

reverse                                                                         
    "in-place reverse the characters of the string."

    "Q: is there a need to redefine it here ?"

%{  /* NOCONTEXT */

    REGISTER char c;
    REGISTER unsigned char *hip, *lowp;

    if (__isString(self)) {
	lowp = __stringVal(self);
	hip = lowp + __stringSize(self) - 1;
	while (lowp < hip) {
	    c = *lowp;
	    *lowp = *hip;
	    *hip = c;
	    lowp++;
	    hip--;
	}
	RETURN ( self );
    }
%}.
    ^ super reverse
!

withoutSeparators
    "return a string containing the chars of myself 
     without leading and trailing whitespace.
     If there is no whitespace, the receiver is returned.
     Notice, this is different from String>>withoutSpaces."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz|

    startIndex := 0.

%{
    REGISTER unsigned char *cp;
    REGISTER unsigned char *ep;
    REGISTER unsigned char c;
    REGISTER unsigned char *cp0;
    REGISTER unsigned char *ep0;

    /* ignore instances of subclasses ... */
    if (__qClass(self) == String) {
	cp = cp0 = __stringVal(self);

	/*
	 * find first non-whiteSpace from beginning
	 */
#ifndef NON_ASCII
# ifdef UINT64
	while (*((UINT64 *)cp) == 0x2020202020202020L) {
	    cp += 8;
	}
# endif
	while (*((unsigned *)cp) == 0x20202020) {
	    cp += 4;
	}
#endif
	while ((c = *cp)
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
	 && (c <= ' ')
#endif
	 && ((c == ' ') || (c == '\n') || (c == '\t')
			|| (c == '\r') || (c == '\f'))
	) {
	    cp++;
	}

	/*
	 * find first non-whiteSpace from end
	 */
	ep = ep0 = cp0 + __stringSize(self) - 1;
	while ((ep >= cp) && (*ep == ' ')) ep--;
	c = *ep;
	while ((ep >= cp) &&
#ifndef NON_ASCII
	       (c <= ' ') &&
#endif
	       ((c == ' ') || (c == '\n') || (c == '\t')
			   || (c == '\r') || (c == '\f'))) {
	    ep--;
	    c = *ep;
	}

	/*
	 * no whiteSpace ?
	 */
	if ((cp == cp0) && (ep == ep0)) {
	    RETURN(self);
	}

	startIndex = __MKSMALLINT(cp - cp0 + 1);
	endIndex = __MKSMALLINT(ep - cp0 + 1);
    }
%}.
    startIndex == 0 ifTrue:[^ super withoutSeparators].

    startIndex > endIndex ifTrue:[^ ''].
    ^ self copyFrom:startIndex to:endIndex

    "
     'hello' withoutSeparators    
     '    hello' withoutSeparators    
     '    hello ' withoutSeparators   
     '    hello  ' withoutSeparators  
     '    hello   ' withoutSeparators 
     '    hello    ' withoutSeparators
     '        ' withoutSeparators       
    "

!

withoutSpaces
    "return a string containing the characters of myself 
     without leading and trailing spaces.
     If there are no spaces, the receiver is returned unchanged.
     Notice, this is different from String>>withoutSeparators."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz blank|

    startIndex := 0.
%{
    REGISTER unsigned char *cp;
    REGISTER unsigned char *ep;
    unsigned char *cp0;
    unsigned char *ep0;

    /* ignore instances of subclasses ... */
    if (__qClass(self) == String) {
	cp = cp0 = __stringVal(self);

	/*
	 * find first non-blank from beginning
	 */
#ifndef NON_ASCII
# ifdef UINT64
	while (*((UINT64 *)cp) == 0x2020202020202020L) {
	    cp += 8;
	}
# endif /* UINT64 */
	while (*((unsigned *)cp) == 0x20202020) {
	    cp += 4;
	}
#endif
	while (*cp == ' ') cp++;

	/*
	 * find first non-blank from end
	 */
	ep = ep0 = cp0 + __stringSize(self) - 1;
	while ((ep >= cp) && (*ep == ' ')) ep--;

	/*
	 * no blanks ?
	 */
	if ((cp == cp0) && (ep == ep0)) {
	    RETURN(self);
	}
        
	startIndex = __MKSMALLINT(cp - cp0 + 1);
	endIndex = __MKSMALLINT(ep - cp0 + 1);
    }
%}.
    startIndex == 0 ifTrue:[^ super withoutSpaces].

    startIndex > endIndex ifTrue:[^ ''].
    ((startIndex == 1) and:[endIndex == self size]) ifTrue:[^ self].
    ^ self copyFrom:startIndex to:endIndex

    "
     '    hello' withoutSpaces    
     '    hello ' withoutSpaces   
     '    hello  ' withoutSpaces  
     '    hello   ' withoutSpaces 
     '    hello    ' withoutSpaces
     '        ' withoutSpaces       
    "
! !

!String methodsFor:'printing & storing'!

isLiteral
    "return true, if the receiver can be used as a literal
     (i.e. can be used in constant arrays)"

    ^ true
!

print
    "print the receiver on standard output.
     This method does NOT use the stream classes and will therefore work
     even in case of emergency."

%{  /* NOCONTEXT */

    if (__qClass(self) == String) {
#ifdef WIN32
	fprintf(stdout, "%s" , __stringVal(self));
#else
	fputs(__stringVal(self), stdout); fflush(stdout);
#endif
	RETURN (self);
    }
%}.
    ^ super print
!

printCR
    "print the receiver on standard output, followed by a cr.
     This method does NOT use the stream classes and will therefore work
     even in case of emergency."

%{  /* NOCONTEXT */

    if (__qClass(self) == String) {
#ifdef WIN32
	fprintf(stderr, "%s\n" , __stringVal(self));
#else
	fputs(__stringVal(self), stdout); fputs("\n", stdout); fflush(stdout);
#endif
	RETURN (self);
    }
%}.
    ^ super printCR
!

printfPrintString:formatString
    "non-portable but sometimes useful.
     Return a printed representation of the receiver as specified by formatString, 
     which is defined by printf.
     No checking on buffer overrun is done; the resulting string may not
     be larger than 799 characters.
     This method is NONSTANDARD and may be removed without notice."

%{  /* STACK: 1000 */

    char buffer[800];
    char *cp;
    OBJ s;

    if (__isString(formatString)) {
	/*
	 * actually only needed on sparc: since thisContext is
	 * in a global register, which gets destroyed by printf,
	 * manually save it here - very stupid ...
	 */

	cp = (char *)__stringVal(self);
	if (__qClass(self) != String)
	    cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(__qClass(self))->c_ninstvars));

	__BEGIN_PROTECT_REGISTERS__

	sprintf(buffer, (char *)__stringVal(formatString), cp);

	__END_PROTECT_REGISTERS__

	s = __MKSTRING(buffer COMMA_SND);
	if (s != nil) {
	    RETURN (s);
	}
    }
%}.
    self primitiveFailed

    "
     'hello' printfPrintString:'%%s -> %s'     
     'hello' printfPrintString:'%%10s -> %10s'  
     'hello' printfPrintString:'%%-10s -> %-10s' 
    "
!

storeOn:aStream
    "put the storeString of myself on aStream"

    aStream nextPut: $'.
    (self includes:$') ifTrue:[
	self printWithQuotesDoubledOn:aStream
    ] ifFalse:[
	aStream nextPutAll:self
    ].
    aStream nextPut:$'

    "Modified: / 15.6.1998 / 17:21:51 / cg"
!

storeString
    "return a String for storing myself"

    |s n index|

    n := self occurrencesOf:$'.
    n == 0 ifFalse:[
	s := String new:(n + 2 + self size).
	s at:1 put:$'.
	index := 2.
	self do:[:thisChar |
	    (thisChar == $') ifTrue:[
		s at:index put:thisChar.
		index := index + 1.
	    ].
	    s at:index put:thisChar.
	    index := index + 1.
	].
	s at:index put:$'.
	^ s
    ].
    ^ '''' , self , ''''
! !

!String methodsFor:'queries'!

bitsPerCharacter
    "return the number of bits each character has.
     Here, 8 is returned (storing single byte characters)."

    ^ 8

    "Modified: 20.4.1996 / 23:08:42 / cg"
!

isEmpty
    "return true if the receiver is empty (i.e. if size == 0)
     Redefined here for performance"

%{  /* NOCONTEXT */
    OBJ cls;

    cls = __qClass(self);
    if ((cls == String) || (cls == Symbol)) {
	RETURN ( (__stringSize(self) == 0) ? true : false);
    }
%}.
    ^ self size == 0
!

knownAsSymbol
    "return true, if there is a symbol with same characters in the
     system.
     Can be used to check for existance of a symbol without creating one"

%{  /* NOCONTEXT */
    OBJ cls;
    int indx;

    cls = __qClass(self);
    if (cls != String) {
	indx = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    } else {
	indx = 0;
    }
    RETURN ( __KNOWNASSYMBOL(__stringVal(self) + indx) );
%}

    "
     'hello' knownAsSymbol     
     'fooBarBaz' knownAsSymbol     
    "
!

notEmpty
    "return true if the receiver is not empty (i.e. if size ~~ 0)
     Redefined here for performance"

%{  /* NOCONTEXT */
    OBJ cls;

    cls = __qClass(self);
    if ((cls == String) || (cls == Symbol)) {
	RETURN ( (__stringSize(self) != 0) ? true : false);
    }
%}.
    ^ self size ~~ 0
! !

!String methodsFor:'regular expression matching'!

asRegex
    "Compile the receiver as a regex matcher. May raise RxParser>>syntaxErrorSignal
    or RxParser>>compilationErrorSignal.
    ||| This is a part of the Regular Expression Matcher package, (c) Vassili Bykov, 1996.
    ||| Refer to `documentation' protocol of RxParser class for details."

    ^ Regex::RxParser preferredMatcherClass for: (Regex::RxParser new parse: self)

    "Modified: 3.6.1997 / 11:25:25 / cg"
!

matchesRegex: regexString
    "Test if the receiver matches a regex.  May raise RxParser>>regexErrorSignal or
    child signals.
    ||| This is a part of the Regular Expression Matcher package, (c) Vassili Bykov, 1996.
    ||| Refer to `documentation' protocol of RxParser class for details."

    ^regexString asRegex matches: self

    "Modified: 3.6.1997 / 11:25:30 / cg"
!

prefixMatchesRegex: regexString
    "Test if the receiver's prefix matches a regex. 
    May raise RxParser class>>regexErrorSignal or child signals.
    ||| This is a part of the Regular Expression Matcher package, (c) Vassili Bykov, 1996.
    ||| Refer to `documentation' protocol of RxParser class for details."

    ^regexString asRegex matchesPrefix: self

    "Modified: 3.6.1997 / 11:25:34 / cg"
! !

!String methodsFor:'testing'!

endsWith:aStringOrChar
    "return true, if the receiver ends with something, aStringOrChar."

%{  /* NOCONTEXT */

    int len1, len2;
    REGISTER unsigned char *src1, *src2;
    unsigned char c;
    REGISTER OBJ slf = self;

    if ((__isString(slf) || __isSymbol(slf))
     && (__isString(aStringOrChar) || __isSymbol(aStringOrChar))) {
        len1 = __qSize(slf);
        len2 = __qSize(aStringOrChar);
        if (len1 < len2) {
            RETURN ( false );
        }

        src1 = __stringVal(slf) + (len1 - len2);
        src2 = __stringVal(aStringOrChar);
        while (c = *src2++) {
            if (c != *src1++) {
                RETURN ( false );
            }
        }
        RETURN (true);
    }
    if (__isCharacter(aStringOrChar)) {
        int val;

        val = __intVal(_characterVal(aStringOrChar));
        if ((unsigned)val <= 255) {
            len1 = __stringSize(slf);
            if (len1 > 0) {
                RETURN ( (__stringVal(slf)[len1-1] == val) ? true : false);
            }
        }
        RETURN ( false );
    }
%}.
    ^ super endsWith:aStringOrChar

    "
     'hello world' endsWith:'world'
     'hello world' endsWith:'earth'
     'hello world' endsWith:$d   
     'hello world' endsWith:$e   
     '' endsWith:$d            
     'hello world' endsWith:#($r $l $d)
     'hello world' endsWith:''   
    "
!

isBlank
    "return true, if the receivers size is 0 or if it contains only spaces.
     Q: should we care for whiteSpace in general here ?"

%{  /* NOCONTEXT */

    REGISTER unsigned char *src;
    REGISTER unsigned char c;
    OBJ cls;

    src = __stringVal(self);
    if ((cls = __qClass(self)) != String)
	src += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

#ifndef NON_ASCII
# ifdef UINT64
    while (*((UINT64 *)src) == 0x2020202020202020L) {
	src += 8;
    }
# endif /* UINT64 */

    while (*((unsigned *)src) == 0x20202020) {
	src += 4;
    }
#endif /* ascii */

    while (c = *src++) {
	if (c != ' ') {
	    RETURN ( false );
	}
    }
%}.
    ^ true
!

levenshteinTo:aString s:substWeight c:caseWeight i:insrtWeight d:deleteWeight
    "parametrized levenshtein. arguments are the costs for
     substitution, case-change, insertion and deletion of a character."

%{  /* STACK: 2000 */

    /* 
     * this is very heavy used when correcting errors 
     * (all symbols are searched for best match) - therefore it must be fast
     */
{
    unsigned short *data;
    int l1, l2;
    REGISTER int sz;
    unsigned char *s1, *s2;
    int v1, v2, v3, m;
    REGISTER unsigned short *dp;
    REGISTER int delta;
    REGISTER int j;
    int i;
    int iW, cW, sW, dW;
#   define FASTSIZE 30  /* increase STACK if you increase this ... */
    unsigned short fastData[(FASTSIZE + 1) * (FASTSIZE + 1)];

    if ((__isString(self) || __isSymbol(self))
     && (__isString(aString) || __isSymbol(aString))
     && __bothSmallInteger(insrtWeight, caseWeight)
     && __bothSmallInteger(substWeight, deleteWeight)) {
	iW = __intVal(insrtWeight);
	cW = __intVal(caseWeight);
	sW = __intVal(substWeight);
	dW = __intVal(deleteWeight);
	s1 = __stringVal(self);
	s2 = __stringVal(aString);
	l1 = strlen(s1);
	l2 = strlen(s2);

	sz = (l1 < l2) ? l2 : l1;
	delta = sz + 1;
	if (sz <= FASTSIZE) {
	    data = fastData;
	} else {
	    /* add ifdef ALLOCA here ... */
	    data = (unsigned short *)malloc(delta * delta * sizeof(short));
	    if (! data) goto mallocFailed;
	}

	data[0] = 0;
	dp = data+1;
	for (j=1, dp=data+1; j<=sz; j++, dp++)
	    *dp = *(dp-1) + iW;

	for (i=1, dp=data+delta; i<=sz; i++, dp+=delta)
	    *dp = *(dp-delta) + dW;

	for (i=1; i<=l1; i++) {
	    for (j=1; j<=l2; j++) {
		dp = data + (i*delta) + j;
		if (s1[i] != s2[j]) {
		    if (tolower(s1[i]) == tolower(s2[j])) {
			m = cW;
		    } else {
			m = sW;
		    }
		} else
		    m = 0;

		v2 = *(dp - 1) + iW;
		v3 = *(dp - delta) + dW;
		v1 = *(dp - delta - 1) + m;
		if (v1 < v2)
		    if (v1 < v3)
			m = v1;
		    else
			m = v3;
		else
		    if (v2 < v3)
			m = v2;
		    else
			m = v3;
		*dp = m;
	    }
	}
	m = data[l1 * delta + l2];
	if (sz > FASTSIZE) 
	    free(data);
	RETURN ( __MKSMALLINT(m) );
    }
mallocFailed: ;
}
%}.

    ^ super levenshteinTo:aString 
			s:substWeight c:caseWeight 
			i:insrtWeight d:deleteWeight

    "'ocmprt' levenshteinTo:'computer'
     'computer' levenshteinTo:'computer'
     'ocmputer' levenshteinTo:'computer'
     'cmputer' levenshteinTo:'computer'
     'Computer' levenshteinTo:'computer'"
!

startsWith:aStringOrChar
    "return true, if the receiver starts with something, aStringOrChar.
     If the argument is empty, true is returned."

%{  /* NOCONTEXT */

    int len1, len2;
    REGISTER unsigned char *src1, *src2;
    unsigned char c;
    REGISTER OBJ slf = self;

    if (((__qClass(slf)==String) || (__qClass(slf)==Symbol))
     && __isNonNilObject(aStringOrChar)
     && ((__qClass(aStringOrChar)==String) || (__qClass(aStringOrChar)==Symbol))) {
        src1 = __stringVal(slf);
        src2 = __stringVal(aStringOrChar);

        if (src1[0] != src2[0]) {
            if (__qSize(aStringOrChar) == (OHDR_SIZE+1) /* 1 for the 0-byte */) {
                RETURN (true);
            }
            RETURN ( false );
        }

        len1 = __qSize(slf);
        len2 = __qSize(aStringOrChar);
        if (len1 < len2) {
            RETURN ( false );
        }

#ifdef UINT64
        while (len2 > (OHDR_SIZE+sizeof(UINT64))) {
            if ( ((UINT64 *)src1)[0] != ((UINT64 *)src2)[0] ) {
                RETURN (false);
            }
            len2 -= sizeof(UINT64);
            src1 += sizeof(UINT64);
            src2 += sizeof(UINT64);
        }
#else
# ifdef UNROLL_LOOPS
        while (len2 > (OHDR_SIZE+sizeof(INT)*4)) {
            if ( ((unsigned INT *)src1)[0] != ((unsigned INT *)src2)[0]) {
                RETURN (false);
            }
            if ( ((unsigned INT *)src1)[1] != ((unsigned INT *)src2)[1]) {
                RETURN (false);
            }
            if ( ((unsigned INT *)src1)[2] != ((unsigned INT *)src2)[2]) {
                RETURN (false);
            }
            if ( ((unsigned INT *)src1)[3] != ((unsigned INT *)src2)[3]) {
                RETURN (false);
            }
            len2 -= sizeof(INT)*4;
            src1 += sizeof(INT)*4;
            src2 += sizeof(INT)*4;
        }
# endif /* UNROLL_LOOPS */
#endif /* UINT64 */

        while (len2 > (OHDR_SIZE+sizeof(INT))) {
            if ( ((unsigned INT *)src1)[0] != ((unsigned INT *)src2)[0]) {
                RETURN (false);
            }
            len2 -= sizeof(INT);
            src1 += sizeof(INT);
            src2 += sizeof(INT);
        }

        while (c = *src2++) {
            if (c != *src1) {
                RETURN ( false );
            }
            src1++;
        }
        RETURN (true);
    }
    if (__isCharacter(aStringOrChar)) {
        int val;

        val = __intVal(_characterVal(aStringOrChar));
        if ((unsigned)val <= 255) {
            len1 = __stringSize(slf);
            if (len1 > 0) {
                RETURN ( (__stringVal(slf)[0] == val) ? true : false);
            }
        }
        RETURN ( false );
    }
%}.
    ^ super startsWith:aStringOrChar

    "
     'hello world' startsWith:'hello'
     'hello world' startsWith:'hella'
     'hello world' startsWith:'hi'      
     'hello world' startsWith:$h   
     'hello world' startsWith:$H   
     'hello world' startsWith:(Character value:16rFF00)   
     'hello world' startsWith:60                          
     'hello world' startsWith:#($h $e $l)
     'hello world' startsWith:''   
    "
! !

!String class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/String.st,v 1.141 1999-07-15 13:44:00 cg Exp $'
! !