String.st
author Claus Gittinger <cg@exept.de>
Fri, 23 Jun 2000 10:21:17 +0200
changeset 5407 d6729266a95b
parent 5312 ac5719fafe43
child 5556 1056cc5d6ce0
permissions -rw-r--r--
*** empty log message ***

"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

"{ Package: 'stx:libbasic' }"

CharacterArray variableByteSubclass:#String
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'Collections-Text'
!

!String primitiveDefinitions!
%{

#include <stdio.h>
#define _STDIO_H_INCLUDED_

#include <ctype.h>

#ifdef LINUX
# define __STRINGDEFS__
# include <linuxIntern.h>
#else
  extern char *strchr();
#endif

/*
 * old st/x creates strings with spaces in it;
 * new st/x will fill it with zeros (for st-80 compatibility)
 * the define below sets old behavior.
 */
#define INITIALIZE_WITH_SPACE
%}
! !

!String class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    Strings are ByteArrays storing Characters.

    Strings are kind of kludgy: to allow for easy handling by c-functions,
    there is always one 0-byte added at the end, which is not counted
    in the strings size, and is not accessable from the smalltalk level.
    This guarantees, that a smalltalk string can always be passed to a
    C- or a system api function without danger (of course, this does not
    prevent a nonsense contents ...)

    You cannot add any instvars to String, since the the run time system & compiler
    creates literal strings and knows that strings have no named instvars.
    If you really need strings with instVars, you have to create a subclass
    of String (the access functions defined here can handle this).
    A little warning though: not all smalltalk systems allow subclassing String,
    so your program may become unportable if you do so.

    [author:]
        Claus Gittinger

    [see also:]
        Text StringCollection TwoByteString JISEncodedString
        Symbol
"
! !

!String class methodsFor:'instance creation'!

basicNew:anInteger
    "return a new empty string with anInteger characters.
     In contrast to other smalltalks, this returns a string filled
     with spaces (instead of a string filled with 0-bytes).
     This makes much more sense, in that a freshly created string
     can be directly used as separator or for formatting."

%{  /* NOCONTEXT */

    OBJ newString;
    REGISTER int len;
    REGISTER unsigned char *cp;
    REGISTER OBJ *op;
    int nInstVars, instsize;

    if (__isSmallInteger(anInteger)) {
        len = __intVal(anInteger);
        if (len >= 0) {
            instsize = OHDR_SIZE + len + 1;
            if (self == String) {
                if (__CanDoQuickNew(instsize)) {        /* OBJECT ALLOCATION */
                    /*
                     * the most common case
                     */
                    __qCheckedNew(newString, instsize);
                    __InstPtr(newString)->o_class = self; /* no need for PROTECT - there was no GC */
                    __qSTORE(newString, self);

                    cp = __stringVal(newString);

#if defined(memset4) && !defined(NON_ASCII)
                    {
                        /* 
                         * no sizeof(int) here please -
                         * - memset4 (if defined) fills 4-bytes on ALL machines 
                         */
                        int l4 = len >> 2;

                        if (len & 3) l4++;
                        memset4(cp, 0x20202020, l4);
                        cp[len] = '\0';
                    }
#else
# ifdef FAST_MEMSET
                    memset(cp, ' ', len);
                    cp[len] = '\0';
# else
                    while (len >= 8) {
#  ifndef NON_ASCII       /* i.e. EBCDIC  */
#   ifdef INT64
                        ((INT64 *)cp)[0] = 0x2020202020202020L;
#   else
                        ((int *)cp)[0] = 0x20202020;
                        ((int *)cp)[1] = 0x20202020;
#   endif
#  else
                        cp[0] = cp[1] = cp[2] = cp[3] = ' ';
                        cp[4] = cp[5] = cp[6] = cp[7] = ' ';
#  endif
                        cp += 8; 
                        len -= 8;
                    }
                    while (len--)
                        *cp++ = ' ';
                    *cp = '\0';
# endif /* not FAST_MEMSET */
#endif /* not memset4 */

                    RETURN (newString);
                }
                nInstVars = 0;
            } else {
                nInstVars = __intVal(__ClassInstPtr(self)->c_ninstvars);
                instsize += __OBJS2BYTES__(nInstVars);
            }

            __PROTECT_CONTEXT__;
            __qNew(newString, instsize);        /* OBJECT ALLOCATION */
            __UNPROTECT_CONTEXT__;

            if (newString == nil) goto fail;

            __InstPtr(newString)->o_class = self;
            __qSTORE(newString, self);

            if (nInstVars) {
                /*
                 * nil-out instvars
                 */
#if defined(memset4)
                memset4(__InstPtr(newString)->i_instvars, nil, nInstVars);
#else
# if defined(FAST_MEMSET) && ! defined(NEGATIVE_ADDRESSES)
                /*
                 * knowing that nil is 0
                 */
                memset(__InstPtr(newString)->i_instvars, 0, __OBJS2BYTES__(nInstVars));
# else
                op = __InstPtr(newString)->i_instvars;
                do {
                    *op++ = nil;
                } while (--nInstVars);
# endif
#endif
                cp = __stringVal(newString) + __OBJS2BYTES__(nInstVars);
            } else {
                cp = __stringVal(newString);
            }

            /*
             * fill with spaces
             */
#ifdef FAST_MEMSET
            memset(cp, ' ', len);
            *(cp + len) = '\0';
#else
            while (len >= 8) {
# ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
#  ifdef INT64
                ((INT64 *)cp)[0] = 0x2020202020202020L;
#  else
                ((int *)cp)[0] = 0x20202020;
                ((int *)cp)[1] = 0x20202020;
#  endif
# else
                cp[0] = cp[1] = cp[2] = cp[3] = ' ';
                cp[4] = cp[5] = cp[6] = cp[7] = ' ';
# endif
                cp += 8;
                len -= 8;
            }
            while (len--)
                *cp++ = ' ';
            *cp = '\0';
#endif
            RETURN (newString);
        }
    }
fail: ;;
%}.
    "
     invalid argument, or out-of-memory:
     use error handling in superclass
    "
    ^ (super basicNew:anInteger+1) atAllPut:(Character space)
!

new:n
    "return a new empty string with anInteger characters.
     In contrast to other smalltalks, this returns a string filled
     with spaces (instead of a string filled with 0-bytes).
     This makes much more sense, in that a freshly created string
     can be directly used as separator or for formatting."

    ^ self basicNew:n
!

readFrom:aStream onError:exceptionBlock
    "read & return the next String from the (character-)stream aStream;
     skipping all whitespace first; return the value of exceptionBlock,
     if no string can be read. The sequence of characters as read from the 
     stream must be one as stored via storeOn: or storeString."

    |collected char|

    "
     this method is not to be inherited
     (i.e. not ok for subclasses; Symbol, for example)
    "
    self ~~ String ifTrue:[
	^ super readFrom:aStream onError:exceptionBlock
    ].

    "skip whiteSpace"
    aStream skipSeparators.
    (aStream next == $') ifTrue:[
	collected := WriteStream on:(String new).
	[true] whileTrue:[
	    aStream atEnd ifTrue:[
		"/ mhmh - reached the end without a closing quote
		"/ looks like an error to me ...
		^ exceptionBlock value
	    ].
	    char := aStream next.
	    char == $' ifTrue:[
		"/ look for another quote
		aStream peekOrNil == $' ifFalse:[
		    ^ collected contents
		].
		aStream next.
	    ].
	    collected nextPut:char
	]
    ].
    ^ exceptionBlock value

    "
     String readFrom:('''hello world''' readStream) 
     String readFrom:('''hello '''' world''' readStream) 
     String readFrom:('1 ''hello'' ' readStream)
     String readFrom:('1 ''hello'' ' readStream) onError:['foobar']  
    "

    "Modified: / 14.4.1998 / 18:46:26 / cg"
!

unititializedNew:anInteger
    "redefine it back - strings must have a 0-byte at the end"

    ^ self basicNew:anInteger
! !

!String class methodsFor:'Compatibility - Squeak'!

cr
    "return a string consisting of the cr-Character"

    ^ Character cr asString

    "Modified: / 13.11.1999 / 13:53:36 / cg"
! !

!String class methodsFor:'binary storage'!

binaryDefinitionFrom:stream manager:manager
    "read a binary representation from stream. This is only invoked
     for long strings. Short strings are stored with 1byte length."

    |s len|

    "take care of subclasses ..."
    ((self == String) or:[self == Symbol]) ifTrue:[
	len := stream nextNumber:4.
	s := String basicNew:len.
	stream nextBytes:len into:s startingAt:1.
	^ s
    ].
    ^ super binaryDefinitionFrom:stream manager:manager

    "Modified: / 2.11.1997 / 16:18:37 / cg"
! !

!String class methodsFor:'queries'!

defaultPlatformClass
    "dummy for ST-80 compatibility"

    ^ self

    "Created: 6.6.1997 / 18:25:56 / cg"
!

isBuiltInClass
    "return true if this class is known by the run-time-system.
     Here, true is returned for myself, false for subclasses."

    ^ self == String

    "Modified: 23.4.1996 / 16:00:38 / cg"
! !

!String methodsFor:'Compatibility - CampSmalltalk'!

asQualifiedReference
    ^ QualifiedName for:self
! !

!String methodsFor:'ID extensions'!

ideCharAt: anInteger
	"Return the character stored at anInteger. Will convert to
	charAt() in Java."

	^self at: anInteger!

ideLength
	"Return the receiver's length. Will convert to
	length() in Java."

	^self size!

idePrintPrimitive
	"Return a string representation of the receiver. This
	calls #asString in Smalltalk (to strip out the single quote
	characters) and nothing in Java (see implementation 
	in Object)."

	^self asString!

ideStringTest: aString
	"Return true if the receiver equals aString,
	else return false. This translates to equals() in 
	Java. Identity testing two symbols is 50% faster 
	on IBM Smalltalk than this method, however
	sending asSymbol once is 850 times slower than
	sending = once.

	Use == if you know both are Symbols, or this method 
	if you know both are Strings. This method is usually 
	faster overall and provides better unicode string 
	matching support in Java."

	^self = aString asString! !

!String methodsFor:'accessing'!

at:index
    "return the character at position index, an Integer.
     Reimplemented here to avoid the additional at:->basicAt: send
     (which we can do here, since at: is obviously not redefined in a subclass).
     This method is the same as at:."

%{  /* NOCONTEXT */

    REGISTER int indx;
    REGISTER OBJ slf, cls;

    if (__isSmallInteger(index)) {
        slf = self;
        cls = __qClass(slf);
        indx = __intVal(index) - 1;
        if (cls != String) {
            if (indx < 0) goto badIndex;
            indx += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
        }
        if ((unsigned)indx < (unsigned)(__stringSize(slf))) {
            RETURN ( __MKCHARACTER(__stringVal(slf)[indx] & 0xFF) );
        }
    }
badIndex: ;
%}.
    ^ self basicAt:index
!

at:index put:aCharacter
    "store the argument, aCharacter at position index, an Integer.
     Return aCharacter (sigh).
     Reimplemented here to avoid the additional at:put:->basicAt:put: send
     (but only for Strings, since subclasses may redefine basicAt:put:).
     This method is the same as basicAt:put:."

%{  /* NOCONTEXT */

    REGISTER int value, indx;
    REGISTER OBJ slf;

    slf = self;

    if (__isString(slf)) {
        if (__isCharacter(aCharacter)) {
            value = __intVal(_characterVal(aCharacter));
            if (((unsigned)value <= 0xFF)
             && __isSmallInteger(index)) {
                indx = __intVal(index) - 1;
                if ((unsigned)indx < (unsigned)(__stringSize(slf))) {
                    __stringVal(slf)[indx] = value;
                    RETURN ( aCharacter );
                }
            }
        }
    }
%}.
    ^ self basicAt:index put:aCharacter
!

basicAt:index
    "return the character at position index, an Integer
     - reimplemented here since we return characters"

%{  /* NOCONTEXT */

    REGISTER int indx;
    REGISTER OBJ slf, cls;

    if (__isSmallInteger(index)) {
        slf = self;
        cls = __qClass(slf);
        indx = __intVal(index) - 1;
        if (cls != String) {
            if (indx < 0) goto badIndex;
            indx += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
        }
        if ((unsigned)indx < (unsigned)(__stringSize(slf))) {
            RETURN ( __MKCHARACTER(__stringVal(slf)[indx] & 0xFF) );
        }
    }
badIndex: ;
%}.
    index isInteger ifFalse:[
        ^ self indexNotInteger:index
    ].
    index == super basicSize ifTrue:[
        ^ self subscriptBoundsError:index
    ].
    ^ Character value:(super basicAt:index)
!

basicAt:index put:aCharacter
    "store the argument, aCharacter at position index, an Integer.
     Returns aCharacter (sigh).
     - reimplemented here since we store characters"

%{  /* NOCONTEXT */

    REGISTER int value, indx;
    REGISTER OBJ slf;
    REGISTER OBJ cls;

    slf = self;

    if (__isCharacter(aCharacter)) {
        value = __intVal(_characterVal(aCharacter));
        if (((unsigned)value <= 0xFF)
         && __isSmallInteger(index)) {
            cls = __qClass(slf);
            indx = __intVal(index) - 1;
            if (cls != String) {
                if (indx < 0) goto badIndex;
                indx += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
            }
            if ((unsigned)indx < (unsigned)(__stringSize(slf))) {
                __stringVal(slf)[indx] = value;
                RETURN ( aCharacter );
            }
        }
    }
badIndex: ;
%}.
    (aCharacter isMemberOf:Character) ifFalse:[
        "
         tried to store something which is not a character
        "
        ^ self elementNotCharacter
    ].
    (aCharacter asciiValue between:1 and:255) ifFalse:[
        "
         tried to store a multibyte character
        "
        ^ self elementBoundsError
    ].
    "
     invalid index
    "
    index isInteger ifFalse:[
        ^ self indexNotInteger:index
    ].
    index == super basicSize ifTrue:[
        ^ self subscriptBoundsError:index
    ].
    super basicAt:index put:aCharacter asciiValue.
    ^ aCharacter 

!

basicSize
    "return the number of characters in myself.
     Redefined here to exclude the 0-byte at the end."

%{  /* NOCONTEXT */
    REGISTER OBJ slf, cls;

    slf = self;

    cls = __qClass(slf);
    if (cls == String) {
        RETURN ( __MKSMALLINT(__stringSize(slf)) );
    }
    RETURN ( __MKSMALLINT(__stringSize(slf)
                          - __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars))));
%}.
    ^ super basicSize - 1

!

size
    "return the number of characters in myself.
     Reimplemented here to avoid the additional size->basicSize send
     (which we can do here, since size is obviously not redefined in a subclass).
     This method is the same as basicSize."

%{  /* NOCONTEXT */
    REGISTER OBJ cls, slf;

    slf = self;
    cls = __qClass(slf);
    if (cls == String) {
        RETURN ( __MKSMALLINT(__stringSize(slf)) );
    }
    RETURN ( __MKSMALLINT(__stringSize(slf)
                         - __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars))));
%}.
    ^ self basicSize
! !

!String methodsFor:'binary storage'!

storeBinaryDefinitionOn:stream manager:manager
    "append a binary representation of the receiver onto stream.
     Redefined since short Strings can be stored with a special type code 
     in a more compact way.
     This is an internal interface for the binary storage mechanism."

    |myClass myBasicSize|

    "/ not for subclasses with named instVars.
    (myClass := self class) instSize ~~ 0 ifTrue:[
	^ super storeBinaryDefinitionOn:stream manager:manager
    ].

    myBasicSize := self basicSize.

    "/ can use a more compact representation;
    "/ but not for subclasses ...

    ((myClass == String) 
    and:[myBasicSize <= 255]) ifTrue:[
	"/ special encoding: <codeForString> <len> <bytes> ...
	stream nextPut:(manager codeForString); nextPut:myBasicSize.
    ] ifFalse:[
	manager putIdOfClass:myClass on:stream.
	stream nextNumber:4 put:myBasicSize.
    ].
    stream nextPutBytes:myBasicSize from:self startingAt:1.

    "Modified: / 2.11.1997 / 15:28:56 / cg"
! !

!String methodsFor:'character searching'!

identityIndexOf:aCharacter
    "return the index of the first occurrences of the argument, aCharacter
     in the receiver or 0 if not found - reimplemented here for speed."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
#ifdef FAST_STRCHR
    char *strchr();
#else
    REGISTER unsigned byteValue;
    REGISTER int index;
    char c;
#endif
    OBJ cls;

    if (__isCharacter(aCharacter)) {
        cp = __stringVal(self);
        if ((cls = __qClass(self)) != String)
            cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
#ifdef FAST_STRCHR
        cp = (unsigned char *) strchr(cp, __intVal(_characterVal(aCharacter)));
        if (cp) {
            RETURN ( __MKSMALLINT(cp - __stringVal(self) + 1) );
        }
#else
        byteValue = __intVal(_characterVal(aCharacter));
        index = 1;
        while (c = *cp++) {
            if (c == byteValue) { RETURN ( __MKSMALLINT(index) ); }
            index++;
        }
#endif
        RETURN ( __MKSMALLINT(0));
    }
    /* with identity compares, only characters can be in myself */
    RETURN ( __MKSMALLINT(0));
%}.
    ^ self primitiveFailed

    "
     'hello world' indexOf:(Character space)                  
     'hello world' indexOf:$A                      
     'hello world' indexOf:1                      
    "
!

includes:aCharacter
    "return true if the argument, aCharacter is included in the receiver
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int byteValue;
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	byteValue = __intVal(_characterVal(aCharacter));
	cp = __stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
#ifdef FAST_STRCHR
	cp = (unsigned char *) strchr(cp, __intVal(_characterVal(aCharacter)));
	if (cp) {
	    RETURN ( true );
	}
#else
	while (*cp) {
	    if (*cp == byteValue) {
		RETURN ( true );
	    }
	    cp++;
	}
#endif
	RETURN (false);
    }
%}.
    ^ super includes:aCharacter

    "
     'hello world' includes:$a
     'hello world' includes:$o  
     'hello world' includes:$x  
     'hello world' includes:1    
     'hello world' asTwoByteString includes:$o  
    "
!

includesAny:aCollection
    "return true, if the receiver includes any of the characters in the
     argument, aCollection.
     - redefined for speed if the argument is a String."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned char *matchP;
    OBJ cls;

    if (__isString(aCollection)) {
        matchP = __stringVal(aCollection);
        cp = __stringVal(self);
        if ((cls = __qClass(self)) != String)
            cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

        while (*cp) {
            if (strchr(matchP, *cp)) {
                RETURN ( true );
            }
            cp++;
        }
        RETURN ( false );
    }
%}.
    ^ super includesAny:aCollection

    "
     'hello world' includesAny:'abcd'                      
     'hello world' includesAny:'xyz'                      
     'hello world' includesAny:(Array with:$a with:$b with:$d)   
     'hello world' includesAny:(Array with:$x with:$y)     
     'hello world' includesAny:(Array with:1 with:2)    
    "
!

indexOf:aCharacter
    "return the index of the first occurrences of the argument, aCharacter
     in the receiver or 0 if not found - reimplemented here for speed."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned byteValue;
    REGISTER int index;
#ifdef FAST_STRCHR
    char *strchr();
#else
    unsigned char c;
#endif
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	cp = __stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

	byteValue = __intVal(_characterVal(aCharacter));
	if (byteValue == 0) {
	    index = strlen(cp);
	    if (index >= __stringSize(self)) {
		index = 0;
	    } else {
		index++;
	    }
	    RETURN ( __MKSMALLINT(index) );
	}

#ifdef FAST_STRCHR
	cp = (unsigned char *) strchr(cp, byteValue);
	if (cp) {
	    RETURN ( __MKSMALLINT(cp - __stringVal(self) + 1) );
	}
#else
	index = 1;
	while (c = *cp++) {
	    if (c == byteValue) { RETURN ( __MKSMALLINT(index) ); }
	    index++;
	}
#endif
	RETURN ( __MKSMALLINT(0));
    }
%}.
    ^ super indexOf:aCharacter

    "
     'hello world' indexOf:(Character space)                  
     'hello world' indexOf:$A                      
    "
!

indexOf:aCharacter startingAt:start
    "return the index of the first occurrence of the argument, aCharacter
     in myself starting at start, anInteger or 0 if not found;
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int index;
    REGISTER unsigned byteValue;
#ifdef FAST_STRCHR
    char *strchr();
#else
    unsigned char c;
#endif
    int len;
    OBJ cls;

    if (__isSmallInteger(start)) {
	if (__isCharacter(aCharacter)) {
	    byteValue = __intVal(_characterVal(aCharacter));
	    index = __intVal(start);
	    if (index <= 0)
		index = 1;
	    if ((cls = __qClass(self)) != String)
		index += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
	    len = __stringSize(self);
	    if (index <= len) {
		cp = __stringVal(self) + index - 1;

		if (byteValue == 0) {
		    index += strlen(cp);
		    if (index > len) {
			index = 0;
		    } 
		    RETURN ( __MKSMALLINT(index) );
		}

#ifdef FAST_STRCHR
		cp = (unsigned char *) strchr(cp, byteValue);
		if (cp) {
		    RETURN ( __MKSMALLINT(cp - __stringVal(self) + 1) );
		}
#else
		while (c = *cp++) {

		    if (c == byteValue) {
			RETURN ( __MKSMALLINT(index) );
		    }
		    index++;
		}
#endif
	    }
	}
	RETURN ( __MKSMALLINT(0) );
    }
%}.
    ^ super indexOf:aCharacter startingAt:start

    "
     'hello world' indexOf:$l startingAt:1 
     'hello world' indexOf:$l startingAt:5  
    "
!

indexOfAny:aCollectionOfCharacters startingAt:start
    "return the index of the first occurrence of any character in aCollectionOfCharacters,
     in myself starting at start, anInteger or 0 if not found;
     - reimplemented here for speed if aCollectionOfCharacters is a string."

%{  /* NOCONTEXT */

    REGISTER unsigned char *ccp;
    REGISTER unsigned char *cp;
    REGISTER int index;
    unsigned char *matchP;
    unsigned char c;
    int len;
    OBJ cls;

    if (__isSmallInteger(start)
     && __isString(aCollectionOfCharacters)) {
	matchP = __stringVal(aCollectionOfCharacters);
	index = __intVal(start);
	if (index <= 0)
	    index = 1;
	if ((cls = __qClass(self)) != String)
	    index += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
	len = __stringSize(self);
	if (index <= len) {
	    cp = __stringVal(self) + index - 1;
	    while (c = *cp++) {
		for (ccp = matchP; *ccp ; ccp++) {
		    if (*ccp == c) {
			RETURN ( __MKSMALLINT(index) );
		    }
		}
		index++;
	    }
	}
	RETURN ( __MKSMALLINT(0) );
    }
%}.
    "/
    "/ fallback: 1st argument not a string or error
    "/
    ^ super indexOfAny:aCollectionOfCharacters startingAt:start

    "
     'hello world' indexOfAny:'eoa' startingAt:1   
     'hello world' indexOfAny:'eoa' startingAt:6 
     'hello world' indexOfAny:'AOE' startingAt:1 
    "
!

indexOfControlCharacterStartingAt:start
    "return the index of the next control character;
     that is a character with asciiValue < 32.
     Return 0 if none is found."

%{  /* NOCONTEXT */

#ifndef NON_ASCII       /* i.e. not EBCDIC ;-) */
    REGISTER unsigned char *cp;
    REGISTER unsigned char c;
    REGISTER unsigned char *cpEnd;
    int len, index;
    OBJ cls;

    index = __intVal(start);
    if (index <= 0) {
	index = 1;
    }
    if ((cls = __qClass(self)) != String)
	index += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    len = __stringSize(self);
    cpEnd = __stringVal(self) + len;
    cp = __stringVal(self) + index - 1;
    if (cp < cpEnd) {
	while (cp < cpEnd) {
	    if (*cp++ < ' ') {
		RETURN ( __MKSMALLINT(cp - __stringVal(self)) );
	    }
	}
	RETURN ( __MKSMALLINT(0) );
    }
#endif
%}.
    ^ super indexOfControlCharacterStartingAt:start

    "
     'hello world'             indexOfControlCharacterStartingAt:1 
     'hello world\foo' withCRs indexOfControlCharacterStartingAt:1 
     '1\' withCRs indexOfControlCharacterStartingAt:1 
     '1\' withCRs indexOfControlCharacterStartingAt:2 
    "
!

indexOfNonSeparatorStartingAt:start
    "return the index of the next non-whiteSpace character"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned char c;
    int len, index;
    OBJ cls;

    index = __intVal(start);
    if (index <= 0) {
        index = 1;
    }
    if ((cls = __qClass(self)) != String)
        index += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    len = __stringSize(self);
    if (index > len) {
        RETURN ( __MKSMALLINT(0) );
    }
    cp = __stringVal(self) + index - 1;
    while (c = *cp++) {
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
        if (c > ' ')
#endif
        if ((c != ' ') && (c != '\t') && (c != '\n')
         && (c != '\r') && (c != '\f')) {
            RETURN ( __MKSMALLINT(cp - __stringVal(self)) );
        }
    }
    RETURN ( __MKSMALLINT(0) );
%}.
    ^ super indexOfNonSeparatorStartingAt:start

    "
     'hello world' indexOfNonWhiteSpaceStartingAt:3 
     'hello world' indexOfNonWhiteSpaceStartingAt:7 
    "
!

indexOfSeparatorStartingAt:start
    "return the index of the next separator character"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned char c;
    int len, index;
    OBJ cls;

    index = __intVal(start);
    if (index <= 0) {
        index = 1;
    }
    if ((cls = __qClass(self)) != String)
        index += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    len = __stringSize(self);
    if (index > len) {
        RETURN ( __MKSMALLINT(0) );
    }
    cp = __stringVal(self) + index - 1;
    while (c = *cp++) {
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
        if (c <= ' ')
#endif
        if ((c == ' ') || (c == '\t') || (c == '\n')
         || (c == '\r') || (c == '\f')) {
            RETURN ( __MKSMALLINT(cp - __stringVal(self)) );
        }
    }
    RETURN ( __MKSMALLINT(0) );
%}.
    ^ super indexOfSeparatorStartingAt:start

    "
     'hello world' indexOfSeparatorStartingAt:3 
     'hello world' indexOfSeparatorStartingAt:7 
    "
!

occurrencesOf:aCharacter
    "count the occurrences of the argument, aCharacter in myself
      - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned byteValue;
    REGISTER int count;
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	count = 0;
	byteValue = __intVal(_characterVal(aCharacter));
	cp = __stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
	while (*cp) {
	    if (*cp++ == byteValue) count++;
	}
	RETURN ( __MKSMALLINT(count) );
    }
%}.
    ^ super occurrencesOf:aCharacter

    "
     'hello world' occurrencesOf:$a
     'hello world' occurrencesOf:$w
     'hello world' occurrencesOf:$l 
     'hello world' occurrencesOf:$x  
     'hello world' occurrencesOf:1 
    "
! !

!String methodsFor:'comparing'!

= aString
    "Compare the receiver with the argument and return true if the
     receiver is equal to the argument. Otherwise return false.
     This compare is case-sensitive (i.e. 'Foo' is NOT = 'foo').
     Use sameAs: to compare with case ignored."

%{  /* NOCONTEXT */

    int l1, l2;
    REGISTER OBJ s = aString;
    unsigned char *cp1, *cp2;
    OBJ cls;
    OBJ myCls;
    INT addrDelta;

    if (s == self) {
	RETURN ( true );
    }
    if (! __isNonNilObject(s)) {
	RETURN ( false );
    }

    cls = __qClass(s);
    myCls = __qClass(self);

    if ((cls == String) || (cls == Symbol) || (cls == myCls)) {
	cp2 = __stringVal(s);
	l2 = __stringSize(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	    l2 -= n;
	}

	cp1 = __stringVal(self);
	l1 = __stringSize(self);
	/*
	 * care for instances of subclasses ...
	 */
	if (myCls != String) {
	    int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(myCls)->c_ninstvars));

	    cp1 += n;
	    l1 -= n;
	}

	if (l1 != l2) {
	    RETURN ( false );
	}
#ifdef FAST_STRNCMP
	RETURN ( (strncmp(cp1, cp2, l1) == 0) ? true : false );
#else
	addrDelta = cp2 - cp1;
# ifdef UNROLL_LOOPS
	while (l1 >= (sizeof(unsigned INT)*4)) {
	    if (((unsigned INT *)cp1)[0] != ((unsigned INT *)(cp1+addrDelta))[0]) {
		RETURN (false);
	    }
	    if (((unsigned INT *)cp1)[1] != ((unsigned INT *)(cp1+addrDelta))[1]) {
		RETURN (false);
	    }
	    if (((unsigned INT *)cp1)[2] != ((unsigned INT *)(cp1+addrDelta))[2]) {
		RETURN (false);
	    }
	    if (((unsigned INT *)cp1)[3] != ((unsigned INT *)(cp1+addrDelta))[3]) {
		RETURN (false);
	    }
	    l1 -= (sizeof(unsigned INT) * 4);
	    cp1 += (sizeof(unsigned INT) * 4);
	}
# endif /* UNROLL_LOOPS */
	while (l1 >= sizeof(unsigned INT)) {
	    if (*((unsigned INT *)cp1) != *((unsigned INT *)(cp1+addrDelta))) {
		RETURN (false);
	    }
	    l1 -= sizeof(unsigned INT);
	    cp1 += sizeof(unsigned INT);
	}
	if (l1 >= sizeof(unsigned short)) {
	    if (*((unsigned short *)cp1) != *((unsigned short *)(cp1+addrDelta))) {
		RETURN (false);
	    }
	    l1 -= sizeof(unsigned short);
	    cp1 += sizeof(unsigned short);
	}
	while (l1) {
	    if (*cp1 != *(cp1+addrDelta)) {
		RETURN (false);
	    }
	    l1--;
	    cp1++;
	}

	RETURN (true);
#endif
    }
%}.
    ^ super = aString

    "
     'foo' = 'Foo' 
     'foo' sameAs: 'Foo' 
    "
!

> aString
    "Compare the receiver with the argument and return true if the
     receiver is greater than the argument. Otherwise return false.
     No national variants are honored; use after: for this.
     In contrast to ST-80, case differences are NOT ignored, thus
     'foo' > 'Foo' will return true. 
     This may change."

%{  /* NOCONTEXT */

    int len1, len2, cmp;
    REGISTER OBJ s = aString;
    unsigned char *cp1, *cp2;
    OBJ cls;
    OBJ myCls;

    if (__isNonNilObject(s)) {
	cls = __qClass(s);
	myCls = __qClass(self);

	if ((cls == String) || (cls == Symbol) || (cls == myCls)) {
	    cp2 = __stringVal(s);
	    len2 = __stringSize(s);
	    /*
	     * care for instances of subclasses ...
	     */
	    if (cls != String) {
		int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

		cp2 += n;
		len2 -= n;
	    }

	    cp1 = __stringVal(self);
	    len1 = __stringSize(self);
	    /*
	     * care for instances of subclasses ...
	     */
	    if (myCls != String) {
		int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(myCls)->c_ninstvars));

		cp1 += n;
		len1 -= n;
	    }

	    if (len1 <= len2)
		cmp = strncmp(cp1, cp2, len1);
	    else
		cmp = strncmp(cp1, cp2, len2);

	    if (cmp > 0) {
		RETURN ( true );
	    }
	    if ((cmp == 0) && (len1 > len2)) {
		RETURN ( true );
	    }
	    RETURN ( false );
	}
    }
%}.
    ^ super > aString
!

after:aString
    "Compare the receiver with the argument and return true if the
     receiver should come after the argument in a sorted list. 
     Otherwise return false.
     The comparison is language specific, depending on the value of
     LC_COLLATE, which is initialized from the environment.

     STUPID:
	#after has a completely different meaning in SeqColl ..."

    ^ (self compareCollatingWith:aString) > 0

    "
     'max' > 'mäx'       
     'man' > 'mäx'      
     'max' after:'mäx'
     'man' after:'mäx' 
    "

    "Modified: 10.5.1996 / 14:02:45 / cg"
!

compareCollatingWith:aString
    "Compare the receiver with the argument and return 1 if the receiver is
     greater, 0 if equal and -1 if less than the argument in a sorted list. 
     The comparison is language specific, depending on the value of
     LC_COLLATE, which is in the shell environment."

%{  /* NOCONTEXT */

    int cmp;
    REGISTER OBJ s = aString;
    unsigned char *cp1, *cp2;
    OBJ cls;
    OBJ myCls;

    if (__isNonNilObject(s)) {
	cls = __qClass(s);
	myCls = __qClass(self);

	if ((cls == String) || (cls == Symbol) || (cls == myCls)) {
	    cp1 = __stringVal(self);

	    /*
	     * care for instances of subclasses ...
	     */
	    if (myCls != String) {
		int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(myCls)->c_ninstvars));

		cp1 += n;
	    }

	    cp2 = __stringVal(s);
	    /*
	     * care for instances of subclasses ...
	     */
	    if (cls != String) {
		int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

		cp2 += n;
	    }

#ifdef HAS_STRCOLL
	    cmp = strcoll(cp1, cp2);
#else
	    cmp = strcmp(cp1, cp2);
#endif

	    if (cmp > 0) {
		RETURN ( __MKSMALLINT(1) );
	    }
	    if (cmp < 0) {
		RETURN ( __MKSMALLINT(-1) );
	    }
	    RETURN ( __MKSMALLINT(0) );
	}
    }
%}.
    "
     currently, this operation is only defined for strings, symbols
     and subclasses.
    "
    ^ self primitiveFailed
!

~= aString
    "Compare the receiver with the argument and return true if the
     receiver is not equal to the argument. Otherwise return false.
     This compare is case-sensitive (i.e. 'Foo' is NOT = 'foo').
     Actually, there is no need to redefine that method here,
     the default (= not as inherited) works ok.
     However, this may be heavily used and the redefinition saves an
     extra message send."

%{  /* NOCONTEXT */

    int l1, l2;
    REGISTER OBJ s = aString;
    unsigned char *cp1, *cp2;
    OBJ cls, myCls;
    INT addrDelta;

    if (s == self) {
	RETURN ( false );
    }
    if (! __isNonNilObject(s)) {
	RETURN ( true );
    }

    cls = __qClass(s);
    myCls = __qClass(self);

    if ((cls == String) || (cls == Symbol) || (cls == myCls)) {
	cp1 = __stringVal(self);
	l1 = __stringSize(self);
	/*
	 * care for instances of subclasses ...
	 */
	if (myCls != String) {
	    int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(myCls)->c_ninstvars));

	    cp1 += n;
	    l1 -= n;
	}

	cp2 = __stringVal(s);
	l2 = __stringSize(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	    l2 -= n;
	}

	if (l1 != l2) {
	    RETURN ( true );
	}

#ifdef FAST_STRNCMP
	RETURN ( (strncmp(cp1, cp2, l1) == 0) ? false : true );
#else
	addrDelta = cp2 - cp1;
# ifdef UNROLL_LOOPS
	while (l1 >= (sizeof(unsigned INT)*4)) {
	    if (((unsigned INT *)cp1)[0] != ((unsigned INT *)(cp1+addrDelta))[0]) {
		RETURN (true);
	    }
	    if (((unsigned INT *)cp1)[1] != ((unsigned INT *)(cp1+addrDelta))[1]) {
		RETURN (true);
	    }
	    if (((unsigned INT *)cp1)[2] != ((unsigned INT *)(cp1+addrDelta))[2]) {
		RETURN (true);
	    }
	    if (((unsigned INT *)cp1)[3] != ((unsigned INT *)(cp1+addrDelta))[3]) {
		RETURN (true);
	    }
	    l1 -= (sizeof(unsigned INT) * 4);
	    cp1 += (sizeof(unsigned INT) * 4);
	}
# endif /* UNROLL_LOOPS */
	while (l1 >= sizeof(unsigned INT)) {
	    if (*((unsigned INT *)cp1) != *((unsigned INT *)(cp1+addrDelta))) {
		RETURN (true);
	    }
	    l1 -= sizeof(unsigned INT);
	    cp1 += sizeof(unsigned INT);
	}
	if (l1 >= sizeof(unsigned short)) {
	    if (*((unsigned short *)cp1) != *((unsigned short *)(cp1+addrDelta))) {
		RETURN (true);
	    }
	    l1 -= sizeof(unsigned short);
	    cp1 += sizeof(unsigned short);
	}
	while (l1) {
	    if (*cp1 != *(cp1+addrDelta)) {
		RETURN (true);
	    }
	    l1--;
	    cp1++;
	}
	RETURN (false);
#endif
    }
%}.
    ^ super ~= aString
! !

!String methodsFor:'converting'!

asBoldText
    "return self as a bold text"

    ^Text string: self emphasis: #bold
!

asString
    "I am a string"

    ^ self

    "Created: 14.8.1997 / 11:37:03 / stefan"
!

asSymbol
    "return a unique symbol with the name taken from my characters."
%{
    OBJ newSymbol;
    OBJ cls;
    int indx;

    cls = __qClass(self);
    if (cls != String) {
	indx = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    } else {
	indx = 0;
    }
    newSymbol = __MKSYMBOL(__stringVal(self) + indx, (OBJ *)0);
    if (newSymbol) {
	RETURN ( newSymbol);
    }
%}.
    ^ ObjectMemory allocationFailureSignal raise.

    "
     'hello' asSymbol  
    "
!

asSymbolIfInterned
    "if a symbol with the receivers characters is already known, return it.
     Otherwise, return nil. This can be used to query for an existing
     symbol and is the same as
        self knownAsSymbol ifTrue:[self asSymbol] ifFalse:[nil]
     but slightly faster, since the symbol lookup operation is only
     performed once."

%{  /* NOCONTEXT */
    OBJ cls;
    int indx;

    cls = __qClass(self);
    if (cls != String) {
        indx = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    } else {
        indx = 0;
    }
    RETURN ( __SYMBOL_OR_NIL(__stringVal(self) + indx));
%}.
    self primitiveFailed
    "
     'hello' asSymbolIfInterned
     'fooBarBaz' asSymbolIfInterned
    "
!

decodeAsLiteralArray
    "given a literalEncoding in the receiver,
     create & return the corresponding object.
     The inverse operation to #literalArrayEncoding."

    ^ self

    "Created: 25.2.1997 / 19:18:19 / cg"
!

literalArrayEncoding
    "encode myself as an array literal, from which a copy of the receiver
     can be reconstructed with #decodeAsLiteralArray."

    ^ self

    "Modified: 1.9.1995 / 02:25:45 / claus"
    "Modified: 22.4.1996 / 13:00:50 / cg"
!

withTabsExpanded
    "return a string with the characters of the receiver where all tabulator characters
     are expanded into spaces (assuming 8-col tabs).
     Notice: if the receiver does not contain any tabs, it is returned unchanged;
     otherwise a new string is returned.
     This does handle multiline strings.
     Rewritten for speed - because this is very heavily used when reading
     big files in the FileBrowser (and therefore speeds up fileReading considerably)."

%{  /* STACK:700 */
    unsigned char buffer[80*8 + 10];
    unsigned char *srcP, *dstP, *cp0;
    int idx, sz;
    int any = 0;
    OBJ newString;
    char c;

    if (__qClass(self) == String) {
	/*
	 * for small strings (< 80), do it without a prescan ...
	 * the buffer is large enough to even convert a
	 * receiver consisting fully of tabs.
	 */
	if (__stringSize(self) < 80) {
	    idx = 1;
	    for (srcP = __stringVal(self), dstP = buffer; (c = *srcP); srcP++) {
		if (c == '\t') {
		    any = 1;
		    while (idx % 8) {
			idx++;
			*dstP++ = ' ';
		    }
		    idx++;
		    *dstP++ = ' ';
		} else {
		    *dstP++ = c;
		    idx++;
		    if (c == '\n') {
			idx = 1;
		    }
		}
	    }
	    if (! any) RETURN(self);
	    *dstP = '\0';
	    RETURN (__MKSTRING_L(buffer, (dstP-buffer)));
	}
	/*
	 * for large strings, we compute the new size, allocate a new string
	 * and expand it.
	 *
	 * first, scan for size ...
	 */
	idx = 1;
	for (srcP = __stringVal(self), sz = 0; (c = *srcP); srcP++) {
	    if (c == '\t') {
		any = 1;
		while (idx % 8) {
		    idx++;
		    sz++;
		}
		idx++; sz ++;
	    } else {
		sz++; idx++;
		if (c == '\n') {
		    idx = 1;
		}
	    }
	}
	if (! any) RETURN(self);

	/*
	 * get the string
	 */
	sz = OHDR_SIZE + sz + 1;
	__qNew(newString, sz);  /* OBJECT ALLOCATION */
	if (newString != nil) {
	    __InstPtr(newString)->o_class = String;

	    /*
	     * expand
	     */
	    idx = 1;
	    for (srcP = __stringVal(self), dstP = cp0 = __stringVal(newString); (c = *srcP); srcP++) {
		if (c == '\t') {
		    while (idx % 8) {
			idx++;
			*dstP++ = ' ';
		    }
		    idx++;
		    *dstP++ = ' ';
		} else {
		    *dstP++ = c; idx++;
		    if (c == '\n') {
			idx = 1;
		    }
		}
	    }
	    *dstP++ = '\0';
	    RETURN (newString);
	}
    }
%}.
    ^ super withTabsExpanded
! !

!String methodsFor:'copying'!

, aString
    "return the concatenation of myself and the argument, aString as
     a String.
     - reimplemented here for speed"

%{
    int l1, l2, sz;
    REGISTER OBJ s = aString;
    REGISTER OBJ _string = String;
    OBJ myClass, argClass, newString;

    if (__isNonNilObject(s)) {
	myClass = __qClass(self);
	argClass = __qClass(s);
	/*
	 * can do it here if both are Strings/Symbols:
	 */
	if (((myClass == _string) || (myClass == Symbol))
	 && ((argClass == _string) || (argClass == Symbol))) {
	    l1 = __stringSize(self);
	    l2 = __stringSize(s);

	    sz = OHDR_SIZE + l1 + l2 + 1;
	    __qNew(newString, sz);      /* OBJECT ALLOCATION */
	    if (newString != nil) {
		char *cp1, *cp2;
		REGISTER unsigned char *dstp;

		__InstPtr(newString)->o_class = String;
		dstp = __stringVal(newString);
		cp1 = (char *) __stringVal(self);
		cp2 = (char *) __stringVal(aString);

#ifdef bcopy4
		/* knowing that allocation is 4-byte aligned and
		 * size rounded up to next 4-byte, the first copy
		 * can be done word-wise.
		 * that speeds up size-10-string , size-10-string 
		 * by 10% on a P5/200.
		 */
		{
		    int nw = l1 >> 2;

		    if (l1 & 3) nw++;
		    bcopy4(cp1, dstp, nw);
		    dstp += l1;
		}
#else
# ifdef FAST_MEMCPY
		bcopy(cp1, dstp, l1);
		dstp += l1;
# else
#  ifdef FAST_STRCPY
		strncpy(dstp, cp1, l1);
		dstp += l1;
#  else
		while (l1 >= 4) {
		    *(int *)dstp = *(int *)cp1;
		    dstp += 4; cp1 += 4;
		    l1 -= 4;
		}
		while (l1--) *dstp++ = *cp1++;
#  endif
# endif
#endif

#ifdef bcopy4
		if (((INT)dstp & 3) == 0) {
		    int nw = l2 >> 2;

		    if (l2 & 3) nw++;
		    bcopy4(cp2, dstp, nw);
		    *(dstp + l2) = '\0';
		    RETURN ( newString );
		}
#endif
                    
#ifdef FAST_MEMCPY
		bcopy(cp2, dstp, l2+1);
		dstp[l2] = '\0';
#else
# ifdef FAST_STRCPY
		strncpy(dstp, cp2, l2);
		dstp[l2] = '\0';
# else
		while (l2--) *dstp++ = *cp2++;
		*dstp = '\0';
# endif
#endif
		RETURN ( newString );
	    }
	}
    }
%}.
    ^ super , aString
!

concatenate:string1 and:string2
    "return the concatenation of myself and the arguments, string1 and string2.
     This is equivalent to self , string1 , string2
     - generated by compiler when such a construct is detected"

    |newString|

%{
    int len1, len2, len3, sz;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;

    if ((__isString(self) || __isSymbol(self))
     && (__isString(string1) || __isSymbol(string1))
     && (__isString(string2) || __isSymbol(string2))) {
	len1 = __stringSize(self);
	len2 = __stringSize(string1);
	len3 = __stringSize(string2);
	sz = OHDR_SIZE + len1 + len2 + len3 + 1;
	__qNew(newString, sz);  /* OBJECT ALLOCATION */
	if (newString != nil) {
	    __InstPtr(newString)->o_class = String;
	    dstp = __stringVal(newString);
#ifdef FAST_MEMCPY
	    bcopy(__stringVal(self), dstp, len1);
	    bcopy(__stringVal(string1), dstp + len1, len2);
	    bcopy(__stringVal(string2), dstp + len1 + len2, len3+1);
	    *(dstp + len1 + len2 + len3) = '\0';
#else
# ifdef FAST_STRCPY
	    strncpy(dstp, __stringVal(self), len1);
	    strncpy(dstp + len1, __stringVal(string1), len2);
	    strncpy(dstp + len1 + len2, __stringVal(string2), len3);
	    *(dstp + len1 + len2 + len3) = '\0';
# else
	    srcp = __stringVal(self);
	    while (len1--) *dstp++ = *srcp++;
	    srcp = __stringVal(string1);
	    while (len2--) *dstp++ = *srcp++;
	    srcp = __stringVal(string2);
	    while (len3--) *dstp++ = *srcp++;
	    *dstp = '\0';
# endif
#endif
	    RETURN ( newString );
	}
    }
%}.
    ^ super , string1 , string2
!

concatenate:string1 and:string2 and:string3
    "return the concatenation of myself and the string arguments.
     This is equivalent to self , string1 , string2 , string3
     - generated by compiler when such a construct is detected"

    |newString|

%{
    int len1, len2, len3, len4, sz;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;

    if ((__isString(self) || __isSymbol(self))
     && (__isString(string1) || __isSymbol(string1))
     && (__isString(string2) || __isSymbol(string2))
     && (__isString(string3) || __isSymbol(string3))) {
	len1 = __stringSize(self);
	len2 = __stringSize(string1);
	len3 = __stringSize(string2);
	len4 = __stringSize(string3);
	sz = OHDR_SIZE + len1 + len2 + len3 + len4 + 1;
	__qNew(newString, sz);  /* OBJECT ALLOCATION */
	if (newString != nil) {
	    __InstPtr(newString)->o_class = String;
	    dstp = __stringVal(newString);
#ifdef FAST_MEMCPY
	    bcopy(__stringVal(self), dstp, len1);
	    bcopy(__stringVal(string1), dstp + len1, len2);
	    bcopy(__stringVal(string2), dstp + len1 + len2, len3);
	    bcopy(__stringVal(string3), dstp + len1 + len2 + len3, len4+1);
	    *(dstp + len1 + len2 + len3 + len4) = '\0';
#else
# ifdef FAST_STRCPY
	    strncpy(dstp, __stringVal(self), len1);
	    strncpy(dstp + len1, __stringVal(string1), len2);
	    strncpy(dstp + len1 + len2, __stringVal(string2), len3);
	    strncpy(dstp + len1 + len2 + len3, __stringVal(string3), len4);
	    *(dstp + len1 + len2 + len3 + len4) = '\0';
# else
	    srcp = __stringVal(self);
	    while (len1--) *dstp++ = *srcp++;
	    srcp = __stringVal(string1);
	    while (len2--) *dstp++ = *srcp++;
	    srcp = __stringVal(string2);
	    while (len3--) *dstp++ = *srcp++;
	    srcp = __stringVal(string3);
	    while (len4--) *dstp++ = *srcp++;
	    *dstp = '\0';
# endif
#endif
	    RETURN ( newString );
	}
    }
%}.
    ^ super , string1 , string2 , string3
!

copy
    "return a copy of the receiver"

    (self isMemberOf:String) ifTrue:[
        ^ self copyFrom:1
    ].
    ^ super copy
!

copyFrom:start
    "return the substring from start, anInteger to the end.
     This method will always return a string, even if the receiver 
     is a subclass-instance. This might change if there is a need.
     - reimplemented here for speed"

%{  /* NOCONTEXT */

#if !defined(FAST_MEMCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    REGISTER int count;
    int len, index1, sz;
    OBJ newString;
    OBJ myClass;

    myClass = __qClass(self);

#ifndef NO_PRIM_STRING
    if (__isSmallInteger(start)
     && ((myClass==String) || (myClass==Symbol))) {
	len = __stringSize(self);
	index1 = __intVal(start);
	if (index1 > 0) {
	    if (index1 <= len) {
		count = len - index1 + 1;
		sz = OHDR_SIZE + count + 1;

		__PROTECT_CONTEXT__
		__qNew(newString, sz);  /* OBJECT ALLOCATION */
		__UNPROTECT_CONTEXT__

		if (newString != nil) {
		    __InstPtr(newString)->o_class = String;
		    dstp = __stringVal(newString);
#ifdef FAST_MEMCPY
		    bcopy(__stringVal(self) + index1 - 1, dstp, count);
		    dstp[count] = '\0';
#else
# ifdef FAST_STRCPY
		    strncpy(dstp, __stringVal(self) + index1 - 1, count);
		    dstp[count] = '\0';
# else
		    srcp = __stringVal(self) + index1 - 1;
		    while (count--) {
			*dstp++ = *srcp++;
		    }
		    *dstp = '\0';
# endif
#endif
		    RETURN ( newString );
		}
	    }
	}
    }
#endif
%}.
    "fall back in case of non-integer index or out-of-bound index;
     will eventually lead to an out-of-bound signal raise"

    ^ super copyFrom:start
!

copyFrom:start to:stop
    "return the substring starting at index start, anInteger and ending
     at stop, anInteger. This method will always return a string, even
     if the receiver is a subclass-instance. This might change if there is a need.
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp;
    REGISTER unsigned char *dstp;
    REGISTER int count;
    int len, sz, index1, index2;
    OBJ newString;
    OBJ myClass;

    myClass = __qClass(self);

#ifndef NO_PRIM_STRING
    if (__bothSmallInteger(start, stop)
     && ((myClass==String) || (myClass==Symbol))) {
	len = __stringSize(self);
	index1 = __intVal(start);
	index2 = __intVal(stop);

	if ((index1 <= index2) && (index1 > 0)) {
	    if (index2 <= len) {
		count = index2 - index1 + 1;
		sz = OHDR_SIZE + count + 1;

		__PROTECT_CONTEXT__
		__qNew(newString, sz);  /* OBJECT ALLOCATION */
		__UNPROTECT_CONTEXT__

		if (newString != nil) {
		    __InstPtr(newString)->o_class = String;
		    dstp = __stringVal(newString);
		    srcp = __stringVal(self) + index1 - 1;
#ifdef bcopy4
		    {
			int nw = count >> 2;

			if (count & 3) {
			    nw++;
			}
			bcopy4(srcp, dstp, nw);
			dstp[count] = '\0';
		    }
#else
# ifdef FAST_MEMCPY
		    bcopy(srcp, dstp, count);
		    dstp[count] = '\0';
# else
#  ifdef FAST_STRCPY
		    strncpy(dstp, srcp, count);
		    dstp[count] = '\0';
#  else
		    while (count--) {
			*dstp++ = *srcp++;
		    }
		    *dstp = '\0';
#  endif
# endif
#endif
		    RETURN ( newString );
		}
	    }
	}
	/*
	 * allow empty copy
	 */
	if (index1 > index2) {
	    __PROTECT_CONTEXT__
	    __qNew(newString, OHDR_SIZE+1);     /* OBJECT ALLOCATION */
	    __UNPROTECT_CONTEXT__
	    if (newString != nil) {
		__InstPtr(newString)->o_class = String;
		(__stringVal(newString))[0] = '\0';
		RETURN ( newString );
	    }
	}
    }
#endif
%}.
    "fall back in case of non-integer index or out-of-bound index;
     will eventually lead to an out-of-bound signal raise"

    ^ super copyFrom:start to:stop
!

copyWith:aCharacter
    "return a new string containing the receivers characters
     and the single new character, aCharacter. 
     This is different from concatentation, which expects another string
     as argument, but equivalent to copy-and-addLast.
     Reimplemented here for more speed"

%{  /* NOCONTEXT */

    int count;
    int sz;
    REGISTER unsigned char *dstp;
    OBJ cls, newString;
    OBJ myClass;

    myClass = __qClass(self);

#ifndef NO_PRIM_STRING
    if (__isCharacter(aCharacter)
     && ((myClass==String) || (myClass==Symbol))) {
	count = __stringSize(self);
	sz = OHDR_SIZE + count + 1 + 1;

	__PROTECT_CONTEXT__
	__qNew(newString, sz);  /* OBJECT ALLOCATION */
	__UNPROTECT_CONTEXT__

	if (newString) {
	    __InstPtr(newString)->o_class = String;
	    dstp = __stringVal(newString);

#ifdef bcopy4
	    {
		int nw = count >> 2;
		char *srcp = (char *)__stringVal(self);

		if (count & 3) {
		    nw++;
		}
		bcopy4(srcp, dstp, nw);
		dstp += count;
	    }
#else
# ifdef FAST_MEMCPY
	    bcopy(__stringVal(self), dstp, count);
	    dstp += count;
# else
#  ifdef FAST_STRCPY
	    strncpy(dstp, __stringVal(self), count);
	    dstp += count;
#  else
	    {
		REGISTER unsigned char *srcp;

		srcp = __stringVal(self);
		while ((*dstp = *srcp++) != '\0')
		    dstp++;
	    }
#  endif
# endif
# endif
	    *dstp++ = __intVal(_characterVal(aCharacter));
	    *dstp = '\0';
	    RETURN (newString );
	}
    }
#endif
%}.
    "fall back in case of non-character arg;
     will eventually lead to an bad element signal raise"

    ^ super copyWith:aCharacter
!

deepCopy
    "return a copy of the receiver"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
!

deepCopyUsing:aDictionary
    "return a copy of the receiver - reimplemented to be a bit faster"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
!

shallowCopy
    "return a copy of the receiver"

    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super shallowCopy
!

simpleDeepCopy
    "return a copy of the receiver"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
! !

!String methodsFor:'filling & replacing'!

atAllPut:aCharacter
    "replace all elements with aCharacter
     - reimplemented here for speed"

%{  /* NOCONTEXT */

#ifndef FAST_MEMSET
    REGISTER unsigned char *dst;
    REGISTER int l;
#endif
    REGISTER int byteValue;

    if (__isCharacter(aCharacter) && __isString(self)) {
	byteValue = __intVal(_characterVal(aCharacter));

#ifdef FAST_MEMSET 
	memset(__stringVal(self), byteValue, __stringSize(self));
#else
	{
	    INT v;

	    v = (byteValue << 8) | byteValue;
	    v = (v << 16) | v;

	    dst = __stringVal(self);
	    l = __stringSize(self);

# ifdef FAST_MEMSET4 /* sorry intel: your stosd instruction is slower ... */
	    memset4(dst, v, l>>2);
	    l = l & 3;
# else
#  ifdef UINT64
	    {
		UINT64 v64;

		v64 = v;
		v64 = (v64 << 32) | v;
		while (l >= 8) {
		    ((UINT64 *)dst)[0] = v64;
		    dst += 8;
		    l -= 8;
		}
	    }
#  else /* no UINT64 */
	    while (l >= 16) {
		((int *)dst)[0] = v;
		((int *)dst)[1] = v;
		((int *)dst)[2] = v;
		((int *)dst)[3] = v;
		dst += 16;
		l -= 16;
	    }
	    if (l >= 8) {
		((int *)dst)[0] = v;
		((int *)dst)[1] = v;
		dst += 8;
		l -= 8;
	    }
	    if (l >= 4) {
		((int *)dst)[0] = v;
		dst += 4;
		l -= 4;
	    }
#   if 0
	    if (l >= 2) {
		((short *)dst)[0] = v;
		dst += 2;
		l -= 2;
	    }
#   endif

#  endif
# endif
	}

	/*
	 * remaining bytes
	 */
	while (l--)
	    *dst++ = byteValue;

#endif /* no FAST_MEMSET */

	RETURN ( self );
    }
%}.
    ^ super atAllPut:aCharacter

    "
     (String new:10) atAllPut:$*      
     String new:10 withAll:$*     
    "
!

replaceAll:oldCharacter with:newCharacter
    "replace all oldCharacters by newCharacter in the receiver.

     Notice: This operation modifies the receiver, NOT a copy;
     therefore the change may affect all others referencing the receiver."

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp;
    REGISTER unsigned oldVal, newVal;
    unsigned char c, cNext;

    if (__isCharacter(oldCharacter)
     && __isCharacter(newCharacter)
     && __isString(self)) {
	srcp = (unsigned char *)__stringVal(self);
	oldVal = __intVal(_characterVal(oldCharacter));
	newVal = __intVal(_characterVal(newCharacter));

	cNext = *srcp;
	while ((c = cNext) != '\0') {
	    cNext = srcp[1];
	    if (c == oldVal)
		*srcp = newVal;
	    srcp++;
	}
	RETURN ( self );
    }
%}.
    ^ super replaceAll:oldCharacter with:newCharacter

    "
     'helloWorld' copy replaceAll:$o with:$O   
     'helloWorld' copy replaceAll:$d with:$*   
     'helloWorld' copy replaceAll:$h with:$*   
    "
!

replaceFrom:start to:stop with:aString startingAt:repStart
    "replace the characters starting at index start, anInteger and ending
     at stop, anInteger with characters from aString starting at repStart.
     Return the receiver.

     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp, *dstp;
    REGISTER int count;
    int len, index1, index2;
    int repLen, repIndex;

#ifndef NO_PRIM_STRING
    if ((__isString(aString) || __isSymbol(aString))
     && __isString(self)
     && __bothSmallInteger(start, stop)) {
	len = __stringSize(self);
	index1 = __intVal(start);
	index2 = __intVal(stop);
	count = index2 - index1 + 1;
	if (count <= 0) {
	     RETURN (self);
	}
	if ((index2 <= len) && (index1 > 0)) {
	    repLen = __stringSize(aString);
	    repIndex = __intVal(repStart);
	    if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) {
		srcp = __stringVal(aString) + repIndex - 1;
		dstp = __stringVal(self) + index1 - 1;
		if (aString == self) {
		    /* take care of overlapping copy */
		    if (srcp < dstp) {
			/* must do a reverse copy */
			srcp += count;
			dstp += count;
			while (count-- > 0) {
			    *--dstp = *--srcp;
			}
			RETURN (self);
		    }
		}
#ifdef bcopy4
		/* copy quadbytes if pointers are aligned */
		/* 
		 * no sizeof(int) here please -
		 * - bcopy4 (if defined) copies 4-bytes on ALL machines 
		 */
		if ((count > 12)
		 && (((unsigned INT)srcp & 3) == 0)
		 && (((unsigned INT)dstp & 3) == 0)) {
		    int n;

		    n = count >> 2;        /* make it quads */
		    bcopy4(srcp, dstp, n);
		    n <<= 2;               /* back to chars */
		    dstp += n;
		    srcp += n;
		    count -= n;
		}
		while (count-- > 0) {
		    *dstp++ = *srcp++;
		}
#else
# ifdef FAST_MEMCPY
		bcopy(srcp, dstp, count);
# else
		/* copy longs if pointers are aligned */
		if ((((unsigned INT)srcp & (sizeof(INT)-1)) == 0)
		 && (((unsigned INT)dstp & (sizeof(INT)-1)) == 0)) {
		    while (count >= sizeof(INT)) {
			*((unsigned INT *)dstp) = *((unsigned INT *)srcp);
			dstp += sizeof(INT);
			srcp += sizeof(INT);
			count -= sizeof(INT);
		    }
		}
		while (count-- > 0) {
		    *dstp++ = *srcp++;
		}
# endif
#endif
		RETURN (self);
	    }
	}
    }
#endif
%}.
    ^ super replaceFrom:start to:stop with:aString startingAt:repStart

!

reverse                                                                         
    "in-place reverse the characters of the string."

    "Q: is there a need to redefine it here ?"

%{  /* NOCONTEXT */

    REGISTER char c;
    REGISTER unsigned char *hip, *lowp;

    if (__isString(self)) {
	lowp = __stringVal(self);
	hip = lowp + __stringSize(self) - 1;
	while (lowp < hip) {
	    c = *lowp;
	    *lowp = *hip;
	    *hip = c;
	    lowp++;
	    hip--;
	}
	RETURN ( self );
    }
%}.
    ^ super reverse
!

withoutSeparators
    "return a string containing the chars of myself 
     without leading and trailing whitespace.
     If there is no whitespace, the receiver is returned.
     Notice, this is different from String>>withoutSpaces."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz|

    startIndex := 0.

%{
    REGISTER unsigned char *cp;
    REGISTER unsigned char *ep;
    REGISTER unsigned char c;
    REGISTER unsigned char *cp0;
    REGISTER unsigned char *ep0;

    /* ignore instances of subclasses ... */
    if (__qClass(self) == String) {
	cp = cp0 = __stringVal(self);

	/*
	 * find first non-whiteSpace from beginning
	 */
#ifndef NON_ASCII
# ifdef UINT64
	while (*((UINT64 *)cp) == 0x2020202020202020L) {
	    cp += 8;
	}
# endif
	while (*((unsigned *)cp) == 0x20202020) {
	    cp += 4;
	}
#endif
	while ((c = *cp)
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
	 && (c <= ' ')
#endif
	 && ((c == ' ') || (c == '\n') || (c == '\t')
			|| (c == '\r') || (c == '\f'))
	) {
	    cp++;
	}

	/*
	 * find first non-whiteSpace from end
	 */
	ep = ep0 = cp0 + __stringSize(self) - 1;
	while ((ep >= cp) && (*ep == ' ')) ep--;
	c = *ep;
	while ((ep >= cp) &&
#ifndef NON_ASCII
	       (c <= ' ') &&
#endif
	       ((c == ' ') || (c == '\n') || (c == '\t')
			   || (c == '\r') || (c == '\f'))) {
	    ep--;
	    c = *ep;
	}

	/*
	 * no whiteSpace ?
	 */
	if ((cp == cp0) && (ep == ep0)) {
	    RETURN(self);
	}

	startIndex = __MKSMALLINT(cp - cp0 + 1);
	endIndex = __MKSMALLINT(ep - cp0 + 1);
    }
%}.
    startIndex == 0 ifTrue:[^ super withoutSeparators].

    startIndex > endIndex ifTrue:[^ ''].
    ^ self copyFrom:startIndex to:endIndex

    "
     'hello' withoutSeparators    
     '    hello' withoutSeparators    
     '    hello ' withoutSeparators   
     '    hello  ' withoutSeparators  
     '    hello   ' withoutSeparators 
     '    hello    ' withoutSeparators
     '        ' withoutSeparators       
    "

!

withoutSpaces
    "return a string containing the characters of myself 
     without leading and trailing spaces.
     If there are no spaces, the receiver is returned unchanged.
     Notice, this is different from String>>withoutSeparators."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz blank|

    startIndex := 0.
%{
    REGISTER unsigned char *cp;
    REGISTER unsigned char *ep;
    unsigned char *cp0;
    unsigned char *ep0;

    /* ignore instances of subclasses ... */
    if (__qClass(self) == String) {
	cp = cp0 = __stringVal(self);

	/*
	 * find first non-blank from beginning
	 */
#ifndef NON_ASCII
# ifdef UINT64
	while (*((UINT64 *)cp) == 0x2020202020202020L) {
	    cp += 8;
	}
# endif /* UINT64 */
	while (*((unsigned *)cp) == 0x20202020) {
	    cp += 4;
	}
#endif
	while (*cp == ' ') cp++;

	/*
	 * find first non-blank from end
	 */
	ep = ep0 = cp0 + __stringSize(self) - 1;
	while ((ep >= cp) && (*ep == ' ')) ep--;

	/*
	 * no blanks ?
	 */
	if ((cp == cp0) && (ep == ep0)) {
	    RETURN(self);
	}
        
	startIndex = __MKSMALLINT(cp - cp0 + 1);
	endIndex = __MKSMALLINT(ep - cp0 + 1);
    }
%}.
    startIndex == 0 ifTrue:[^ super withoutSpaces].

    startIndex > endIndex ifTrue:[^ ''].
    ((startIndex == 1) and:[endIndex == self size]) ifTrue:[^ self].
    ^ self copyFrom:startIndex to:endIndex

    "
     '    hello' withoutSpaces    
     '    hello ' withoutSpaces   
     '    hello  ' withoutSpaces  
     '    hello   ' withoutSpaces 
     '    hello    ' withoutSpaces
     '        ' withoutSpaces       
    "
! !

!String methodsFor:'printing & storing'!

isLiteral
    "return true, if the receiver can be used as a literal constant in ST syntax
     (i.e. can be used in constant arrays)"

    ^ true
!

print
    "print the receiver on standard output.
     This method does NOT (by purpose) use the stream classes and 
     will therefore work even in case of emergency."

%{  /* NOCONTEXT */

    if (__qClass(self) == String) {
#ifdef WIN32
	fprintf(stdout, "%s" , __stringVal(self));
#else
/*cg: used to have fputs here, but that seems to be NOT prepared
 * for eintr arriving and thus sometimes sends the string twice ...
 *
 *        fputs(__stringVal(self), stdout);
 */
	fwrite(__stringVal(self), 1, __stringSize(self), stdout);
	fflush(stdout);
#endif
	RETURN (self);
    }
%}.
    ^ super print
!

printCR
    "print the receiver on standard output, followed by a cr.
     This method does NOT (by purpose) use the stream classes and 
     will therefore work even in case of emergency."

%{  /* NOCONTEXT */

    if (__qClass(self) == String) {
#ifdef WIN32
	fprintf(stderr, "%s\n" , __stringVal(self));
#else
/*cg: used to have fputs here, but that seems to be NOT prepared
 * for eintr arriving and thus sometimes sends the string twice ...
 *
 * fputs(__stringVal(self), stdout); fputs("\n", stdout); 
 */
	fwrite(__stringVal(self), 1, __stringSize(self), stdout);
	fwrite("\n", 1, 1, stdout);
	fflush(stdout);
#endif
	RETURN (self);
    }
%}.
    ^ super printCR
!

printfPrintString:formatString
    "non-portable but sometimes useful.
     Return a printed representation of the receiver as specified by formatString, 
     which is defined by printf.
     No checking on buffer overrun is done; the resulting string may not
     be larger than 799 characters.
     This method is NONSTANDARD and may be removed without notice."

%{  /* STACK: 1000 */

    char buffer[800];
    char *cp;
    OBJ s;

    if (__isString(formatString)) {
	/*
	 * actually only needed on sparc: since thisContext is
	 * in a global register, which gets destroyed by printf,
	 * manually save it here - very stupid ...
	 */

	cp = (char *)__stringVal(self);
	if (__qClass(self) != String)
	    cp += __OBJS2BYTES__(__intVal(__ClassInstPtr(__qClass(self))->c_ninstvars));

	__BEGIN_PROTECT_REGISTERS__

	sprintf(buffer, (char *)__stringVal(formatString), cp);

	__END_PROTECT_REGISTERS__

	s = __MKSTRING(buffer COMMA_SND);
	if (s != nil) {
	    RETURN (s);
	}
    }
%}.
    self primitiveFailed

    "
     'hello' printfPrintString:'%%s -> %s'     
     'hello' printfPrintString:'%%10s -> %10s'  
     'hello' printfPrintString:'%%-10s -> %-10s' 
    "
!

storeOn:aStream
    "put the storeString of myself on aStream"

    aStream nextPut: $'.
    (self includes:$') ifTrue:[
	self printWithQuotesDoubledOn:aStream
    ] ifFalse:[
	aStream nextPutAll:self
    ].
    aStream nextPut:$'

    "Modified: / 15.6.1998 / 17:21:51 / cg"
!

storeString
    "return a String for storing myself"

    |s n index|

    n := self occurrencesOf:$'.
    n == 0 ifFalse:[
	s := String new:(n + 2 + self size).
	s at:1 put:$'.
	index := 2.
	self do:[:thisChar |
	    (thisChar == $') ifTrue:[
		s at:index put:thisChar.
		index := index + 1.
	    ].
	    s at:index put:thisChar.
	    index := index + 1.
	].
	s at:index put:$'.
	^ s
    ].
    ^ '''' , self , ''''
! !

!String methodsFor:'queries'!

bitsPerCharacter
    "return the number of bits each character has.
     Here, 8 is returned (storing single byte characters)."

    ^ 8

    "Modified: 20.4.1996 / 23:08:42 / cg"
!

isEmpty
    "return true if the receiver is empty (i.e. if size == 0)
     Redefined here for performance"

%{  /* NOCONTEXT */
    OBJ cls;

    cls = __qClass(self);
    if ((cls == String) || (cls == Symbol)) {
	RETURN ( (__stringSize(self) == 0) ? true : false);
    }
%}.
    ^ self size == 0
!

knownAsSymbol
    "return true, if there is a symbol with same characters in the
     system.
     Can be used to check for existance of a symbol without creating one"

%{  /* NOCONTEXT */
    OBJ cls;
    int indx;

    cls = __qClass(self);
    if (cls != String) {
        indx = __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));
    } else {
        indx = 0;
    }
    RETURN ( __KNOWNASSYMBOL(__stringVal(self) + indx) );
%}.
    self primitiveFailed

    "
     'hello' knownAsSymbol     
     'fooBarBaz' knownAsSymbol     
    "
!

notEmpty
    "return true if the receiver is not empty (i.e. if size ~~ 0)
     Redefined here for performance"

%{  /* NOCONTEXT */
    OBJ cls;

    cls = __qClass(self);
    if ((cls == String) || (cls == Symbol)) {
	RETURN ( (__stringSize(self) != 0) ? true : false);
    }
%}.
    ^ self size ~~ 0
! !

!String methodsFor:'regular expression matching'!

asRegex
    "Compile the receiver as a regex matcher. May raise RxParser>>syntaxErrorSignal
    or RxParser>>compilationErrorSignal.
    ||| This is a part of the Regular Expression Matcher package, (c) Vassili Bykov, 1996.
    ||| Refer to `documentation' protocol of RxParser class for details."

    ^ Regex::RxParser preferredMatcherClass for: (Regex::RxParser new parse: self)

    "Modified: 3.6.1997 / 11:25:25 / cg"
!

matchesRegex: regexString
    "Test if the receiver matches a regex.  May raise RxParser>>regexErrorSignal or
    child signals.
    ||| This is a part of the Regular Expression Matcher package, (c) Vassili Bykov, 1996.
    ||| Refer to `documentation' protocol of RxParser class for details."

    ^regexString asRegex matches: self

    "Modified: 3.6.1997 / 11:25:30 / cg"
!

prefixMatchesRegex: regexString
    "Test if the receiver's prefix matches a regex. 
    May raise RxParser class>>regexErrorSignal or child signals.
    ||| This is a part of the Regular Expression Matcher package, (c) Vassili Bykov, 1996.
    ||| Refer to `documentation' protocol of RxParser class for details."

    ^regexString asRegex matchesPrefix: self

    "Modified: 3.6.1997 / 11:25:34 / cg"
! !

!String methodsFor:'testing'!

endsWith:aStringOrChar
    "return true, if the receiver ends with something, aStringOrChar."

%{  /* NOCONTEXT */

    int len1, len2;
    REGISTER unsigned char *src1, *src2;
    unsigned char c;
    REGISTER OBJ slf = self;

    if ((__isString(slf) || __isSymbol(slf))
     && (__isString(aStringOrChar) || __isSymbol(aStringOrChar))) {
	len1 = __qSize(slf);
	len2 = __qSize(aStringOrChar);
	if (len1 < len2) {
	    RETURN ( false );
	}

	src1 = __stringVal(slf) + (len1 - len2);
	src2 = __stringVal(aStringOrChar);
	while (c = *src2++) {
	    if (c != *src1++) {
		RETURN ( false );
	    }
	}
	RETURN (true);
    }
    if (__isCharacter(aStringOrChar)) {
	int val;

	val = __intVal(_characterVal(aStringOrChar));
	if ((unsigned)val <= 255) {
	    len1 = __stringSize(slf);
	    if (len1 > 0) {
		RETURN ( (__stringVal(slf)[len1-1] == val) ? true : false);
	    }
	}
	RETURN ( false );
    }
%}.
    ^ super endsWith:aStringOrChar

    "
     'hello world' endsWith:'world'
     'hello world' endsWith:'earth'
     'hello world' endsWith:$d   
     'hello world' endsWith:$e   
     '' endsWith:$d            
     'hello world' endsWith:#($r $l $d)
     'hello world' endsWith:''   
    "
!

isBlank
    "return true, if the receivers size is 0 or if it contains only spaces.
     Q: should we care for whiteSpace in general here ?"

%{  /* NOCONTEXT */

    REGISTER unsigned char *src;
    REGISTER unsigned char c;
    OBJ cls;

    src = __stringVal(self);
    if ((cls = __qClass(self)) != String)
        src += __OBJS2BYTES__(__intVal(__ClassInstPtr(cls)->c_ninstvars));

#ifndef NON_ASCII
# ifdef UINT64
    while (*((UINT64 *)src) == 0x2020202020202020L) {
        src += 8;
    }
# endif /* UINT64 */

    while (*((unsigned *)src) == 0x20202020) {
        src += 4;
    }
#endif /* ascii */

    while (c = *src++) {
        if (c != ' ') {
            RETURN ( false );
        }
    }
    RETURN ( true );
%}.
    ^ super isBlank
!

levenshteinTo:aString s:substWeight c:caseWeight i:insrtWeight d:deleteWeight
    "parametrized levenshtein. arguments are the costs for
     substitution, case-change, insertion and deletion of a character."

%{  /* STACK: 2000 */

    /* 
     * this is very heavy used when correcting errors 
     * (all symbols are searched for best match) - therefore it must be fast
     */
{
    unsigned short *data;
    int l1, l2;
    REGISTER int sz;
    unsigned char *s1, *s2;
    int v1, v2, v3, m;
    REGISTER unsigned short *dp;
    REGISTER int delta;
    REGISTER int j;
    int i;
    int iW, cW, sW, dW;
#   define FASTSIZE 30  /* increase STACK if you increase this ... */
    unsigned short fastData[(FASTSIZE + 1) * (FASTSIZE + 1)];

    if ((__isString(self) || __isSymbol(self))
     && (__isString(aString) || __isSymbol(aString))
     && __bothSmallInteger(insrtWeight, caseWeight)
     && __bothSmallInteger(substWeight, deleteWeight)) {
	iW = __intVal(insrtWeight);
	cW = __intVal(caseWeight);
	sW = __intVal(substWeight);
	dW = __intVal(deleteWeight);
	s1 = __stringVal(self);
	s2 = __stringVal(aString);
	l1 = strlen(s1);
	l2 = strlen(s2);

	sz = (l1 < l2) ? l2 : l1;
	delta = sz + 1;
	if (sz <= FASTSIZE) {
	    data = fastData;
	} else {
	    /* add ifdef ALLOCA here ... */
	    data = (unsigned short *)malloc(delta * delta * sizeof(short));
	    if (! data) goto mallocFailed;
	}

	data[0] = 0;
	dp = data+1;
	for (j=1, dp=data+1; j<=sz; j++, dp++)
	    *dp = *(dp-1) + iW;

	for (i=1, dp=data+delta; i<=sz; i++, dp+=delta)
	    *dp = *(dp-delta) + dW;

	for (i=1; i<=l1; i++) {
	    for (j=1; j<=l2; j++) {
		dp = data + (i*delta) + j;
		if (s1[i] != s2[j]) {
		    if (tolower(s1[i]) == tolower(s2[j])) {
			m = cW;
		    } else {
			m = sW;
		    }
		} else
		    m = 0;

		v2 = *(dp - 1) + iW;
		v3 = *(dp - delta) + dW;
		v1 = *(dp - delta - 1) + m;
		if (v1 < v2)
		    if (v1 < v3)
			m = v1;
		    else
			m = v3;
		else
		    if (v2 < v3)
			m = v2;
		    else
			m = v3;
		*dp = m;
	    }
	}
	m = data[l1 * delta + l2];
	if (sz > FASTSIZE) 
	    free(data);
	RETURN ( __MKSMALLINT(m) );
    }
mallocFailed: ;
}
%}.

    ^ super levenshteinTo:aString 
			s:substWeight c:caseWeight 
			i:insrtWeight d:deleteWeight

    "'ocmprt' levenshteinTo:'computer'
     'computer' levenshteinTo:'computer'
     'ocmputer' levenshteinTo:'computer'
     'cmputer' levenshteinTo:'computer'
     'Computer' levenshteinTo:'computer'"
!

startsWith:aStringOrChar
    "return true, if the receiver starts with something, aStringOrChar.
     If the argument is empty, true is returned."

%{  /* NOCONTEXT */

    int len1, len2;
    REGISTER unsigned char *src1, *src2;
    unsigned char c;
    REGISTER OBJ slf = self;

    if (((__qClass(slf)==String) || (__qClass(slf)==Symbol))
     && __isNonNilObject(aStringOrChar)
     && ((__qClass(aStringOrChar)==String) || (__qClass(aStringOrChar)==Symbol))) {
	src1 = __stringVal(slf);
	src2 = __stringVal(aStringOrChar);

	if (src1[0] != src2[0]) {
	    if (__qSize(aStringOrChar) == (OHDR_SIZE+1) /* 1 for the 0-byte */) {
		RETURN (true);
	    }
	    RETURN ( false );
	}

	len1 = __qSize(slf);
	len2 = __qSize(aStringOrChar);
	if (len1 < len2) {
	    RETURN ( false );
	}

#ifdef UINT64
	while (len2 > (OHDR_SIZE+sizeof(UINT64))) {
	    if ( ((UINT64 *)src1)[0] != ((UINT64 *)src2)[0] ) {
		RETURN (false);
	    }
	    len2 -= sizeof(UINT64);
	    src1 += sizeof(UINT64);
	    src2 += sizeof(UINT64);
	}
#else
# ifdef UNROLL_LOOPS
	while (len2 > (OHDR_SIZE+sizeof(INT)*4)) {
	    if ( ((unsigned INT *)src1)[0] != ((unsigned INT *)src2)[0]) {
		RETURN (false);
	    }
	    if ( ((unsigned INT *)src1)[1] != ((unsigned INT *)src2)[1]) {
		RETURN (false);
	    }
	    if ( ((unsigned INT *)src1)[2] != ((unsigned INT *)src2)[2]) {
		RETURN (false);
	    }
	    if ( ((unsigned INT *)src1)[3] != ((unsigned INT *)src2)[3]) {
		RETURN (false);
	    }
	    len2 -= sizeof(INT)*4;
	    src1 += sizeof(INT)*4;
	    src2 += sizeof(INT)*4;
	}
# endif /* UNROLL_LOOPS */
#endif /* UINT64 */

	while (len2 > (OHDR_SIZE+sizeof(INT))) {
	    if ( ((unsigned INT *)src1)[0] != ((unsigned INT *)src2)[0]) {
		RETURN (false);
	    }
	    len2 -= sizeof(INT);
	    src1 += sizeof(INT);
	    src2 += sizeof(INT);
	}

	while (c = *src2++) {
	    if (c != *src1) {
		RETURN ( false );
	    }
	    src1++;
	}
	RETURN (true);
    }
    if (__isCharacter(aStringOrChar)) {
	int val;

	val = __intVal(_characterVal(aStringOrChar));
	if ((unsigned)val <= 255) {
	    len1 = __stringSize(slf);
	    if (len1 > 0) {
		RETURN ( (__stringVal(slf)[0] == val) ? true : false);
	    }
	}
	RETURN ( false );
    }
%}.
    ^ super startsWith:aStringOrChar

    "
     'hello world' startsWith:'hello'
     'hello world' startsWith:'hella'
     'hello world' startsWith:'hi'      
     'hello world' startsWith:$h   
     'hello world' startsWith:$H   
     'hello world' startsWith:(Character value:16rFF00)   
     'hello world' startsWith:60                          
     'hello world' startsWith:#($h $e $l)
     'hello world' startsWith:''   
    "
! !

!String methodsFor:'tracing'!

traceInto:aRequestor level:level from:referrer
    "double dispatch into tracer, passing my type implicitely in the selector"

    ^ aRequestor traceString:self level:level from:referrer


! !

!String class methodsFor:'documentation'!

version
    ^ '$Header: /cvs/stx/stx/libbasic/String.st,v 1.152 2000-06-23 08:20:15 cg Exp $'
! !