String.st
author claus
Tue, 08 Aug 1995 02:49:43 +0200
changeset 375 e5019c22f40e
parent 370 20f04d9b371b
child 379 5b5a130ccd09
permissions -rw-r--r--
.

"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

CharacterArray subclass:#String
       instanceVariableNames:''
       classVariableNames:''
       poolDictionaries:''
       category:'Collections-Text'
!

String comment:'
COPYRIGHT (c) 1988 by Claus Gittinger
	     All Rights Reserved

$Header: /cvs/stx/stx/libbasic/String.st,v 1.41 1995-08-08 00:49:12 claus Exp $
'!

!String class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

version
"
$Header: /cvs/stx/stx/libbasic/String.st,v 1.41 1995-08-08 00:49:12 claus Exp $
"
!

documentation
"
    Strings are ByteArrays storing Characters.

    Strings are kind of kludgy: to allow for easy handling by c-functions,
    there is always one 0-byte added at the end, which is not counted
    in size. also, the at:put: method does not allow for storing 0-bytes.
    (to do this, the basicAt:put: and basicNew: methods are redefined)

    You cannot add any instvars to String, since the the run time system & compiler
    creates literal strings and knows that strings have no named instvars.

    If you really need strings with instVars, you have to create a subclass 
    of String (the access functions defined here can handle this).
    A little warning though: not all smalltalk systems allow subclassing String,
    so your program may become unportable if you do so.
"
! !

!String primitiveDefinitions!

%{
#include <stdio.h>
#include <ctype.h>

#ifdef LINUX
# define __STRINGDEFS__
# include <linuxIntern.h>
#else
  extern char *strchr();
#endif

/*
 * old st/x creates strings with spaces in it;
 * new st/x will fill it with zeros (for st-80 compatibility)
 * the define below sets old behavior.
 */
#define INITIALIZE_WITH_SPACE
%}
! !

!String class methodsFor:'queries'!

isBuiltInClass
    "this class is known by the run-time-system"

    ^ self == String
! !

!String class methodsFor:'instance creation'!

new:n
    ^ self basicNew:n
!

basicNew:anInteger
    "return a new empty string with anInteger characters"

%{  /* NOCONTEXT */

    OBJ newString;
    REGISTER int len;
    REGISTER unsigned char *cp;
    REGISTER OBJ *op;
    int nInstVars, instsize;

    if (__isSmallInteger(anInteger)) {
	len = _intVal(anInteger);
	if (len >= 0) {
	    if (self == String) {
		instsize = OHDR_SIZE + len + 1;
		if (_CanDoQuickNew(instsize)) {
		    /*
		     * the most common case
		     */
		    _qCheckedNew(newString, instsize);
		    _InstPtr(newString)->o_class = self;
		    __qSTORE(newString, self);

		    cp = _stringVal(newString);
#if defined(memset4) && !defined(NON_ASCII)
		    instsize = len >> 2;
		    if (len & 3) instsize++;
		    memset4(cp, 0x20202020, instsize);
		    *(cp + len) = '\0';
#else
# ifdef FAST_MEMSET
		    memset(cp, ' ', len);
		    *(cp + len) = '\0';
# else
		    while (len >= 8) {
#  ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
			((int *)cp)[0] = 0x20202020;
			((int *)cp)[1] = 0x20202020;
#  else
			cp[0] = cp[1] = cp[2] = cp[3] = ' ';
			cp[4] = cp[5] = cp[6] = cp[7] = ' ';
#  endif
			cp += 8; 
			len -= 8;
		    }
		    while (len--)
			*cp++ = ' ';
		    *cp = '\0';
# endif
#endif
		    RETURN (newString);
		}
		nInstVars = 0;
	    } else {
		nInstVars = _intVal(_ClassInstPtr(self)->c_ninstvars);
		instsize = OHDR_SIZE + __OBJS2BYTES__(nInstVars) + len + 1;
	    }

	    PROTECT_CONTEXT
	    _qNew(newString, instsize, SENDER);
	    UNPROTECT_CONTEXT
	    if (newString == nil) goto fail;
	    _InstPtr(newString)->o_class = self;
	    __qSTORE(newString, self);

	    if (nInstVars) {
#if defined(memset4)
		memset4(_InstPtr(newString)->i_instvars, nil, nInstVars);
#else
# if defined(FAST_MEMSET) && ! defined(NEGATIVE_ADDRESSES)
		/*
		 * knowing that nil is 0
		 */
		memset(_InstPtr(newString)->i_instvars, 0, __OBJS2BYTES__(nInstVars));
# else
		op = _InstPtr(newString)->i_instvars;
		do {
		    *op++ = nil;
		} while (--nInstVars);
# endif
#endif
		cp = _stringVal(newString) + __OBJS2BYTES__(nInstVars);
	    } else {
		cp = _stringVal(newString);
	    }

#ifdef FAST_MEMSET
	    memset(cp, ' ', len);
	    *(cp + len) = '\0';
#else
	    while (len >= 8) {
# ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
		((int *)cp)[0] = 0x20202020;
		((int *)cp)[1] = 0x20202020;
# else
		cp[0] = cp[1] = cp[2] = cp[3] = ' ';
		cp[4] = cp[5] = cp[6] = cp[7] = ' ';
# endif
		cp += 8;
		len -= 8;
	    }
	    while (len--)
		*cp++ = ' ';
	    *cp = '\0';
#endif
	    RETURN (newString);
	}
    }
fail: ;;
%}
.
    "
     invalid argument, or out-of-memory:
     use error handling in superclass
    "
    ^ (super basicNew:anInteger) atAllPut:(Character space)
!

unititializedNew:anInteger
    "redefine it back - strings must have a 0-byte at the end"

    ^ self basicNew:anInteger
!

readFrom:aStream onError:exceptionBlock
    "read & return the next String from the (character-)stream aStream;
     skipping all whitespace first; return the value of exceptionBlock,
     if no string can be read. The sequence of characters as read from the 
     stream must be one as stored via storeOn: or storeString."

    |collected char|

    "
     this method is not to be inherited
     (i.e. not ok for subclasses; Symbol, for example)
    "
    self ~~ String ifTrue:[
	^ super readFrom:aStream onError:exceptionBlock
    ].

    "skip whiteSpace"
    aStream skipSeparators.
    (aStream next == $') ifTrue:[
	collected := WriteStream on:(String new).
	[true] whileTrue:[
	    aStream atEnd ifTrue:[
		"/ mhmh - reached the end without a closing quote
		"/ looks like an error to me ...
		^ exceptionBlock value
	    ].
	    char := aStream next.
	    char == $' ifTrue:[
		"/ look for another quote
		aStream peek == $' ifFalse:[
		    ^ collected contents
		].
		aStream next.
	    ].
	    collected nextPut:char
	]
    ].
    ^ exceptionBlock value

    "
     String readFrom:('''hello world''' readStream) 
     String readFrom:('''hello '''' world''' readStream) 
     String readFrom:('1 ''hello'' ' readStream)
     String readFrom:('1 ''hello'' ' readStream) onError:['foobar']  
    "  
! !

!String methodsFor:'accessing'!

size
    "return the number of characters in myself.
     Reimplemented here to avoid the additional size->basicSize send
     (which we can do here, since size is obviously not redefined in a subclass).
     This method is the same as basicSize."

%{  /* NOCONTEXT */
    REGISTER OBJ cls;

    cls = __qClass(self);
    if (cls == String) {
	RETURN ( _MKSMALLINT(_stringSize(self)) );
    }
    RETURN ( _MKSMALLINT(_stringSize(self)
			 - __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars))));
%}
!

basicSize
    "return the number of characters in myself.
     Redefined here to exclude the 0-byte at the end."

%{  /* NOCONTEXT */
    REGISTER OBJ cls;

    cls = __qClass(self);
    if (cls == String) {
	RETURN ( _MKSMALLINT(_stringSize(self)) );
    }
    RETURN ( _MKSMALLINT(_stringSize(self)
			 - __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars))));
%}
!

at:index
    "return the character at position index, an Integer.
     Reimplemented here to avoid the additional at:->basicAt: send
     (which we can do here, since at: is obviously not redefined in a subclass).
     This method is the same as at:."

%{  /* NOCONTEXT */

    REGISTER int indx;
    REGISTER OBJ cls;

    if (__isSmallInteger(index)) {
	indx = _intVal(index);
	if (indx > 0) {
	    cls = __qClass(self);
	    if (cls != String)
		indx += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	    if (indx <= (_stringSize(self))) {
		RETURN ( _MKCHARACTER(_stringVal(self)[indx-1] & 0xFF) );
	    }
	}
    }
%}.
    ^ self subscriptBoundsError:index
!

basicAt:index
    "return the character at position index, an Integer
     - reimplemented here since we return characters"

%{  /* NOCONTEXT */

    REGISTER int indx;
    REGISTER OBJ cls;

    if (__isSmallInteger(index)) {
	indx = _intVal(index);
	if (indx > 0) {
	    cls = __qClass(self);
	    if (cls != String)
		indx += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	    if (indx <= (_stringSize(self))) {
		RETURN ( _MKCHARACTER(_stringVal(self)[indx-1] & 0xFF) );
	    }
	}
    }
%}.
    ^ self subscriptBoundsError:index
!

at:index put:aCharacter
    "store the argument, aCharacter at position index, an Integer.
     Reimplemented here to avoid the additional at:put:->basicAt:put: send
     (but only for Strings, since subclasses may redefine basicAt:put:).
     This method is the same as basicAt:put:."

%{  /* NOCONTEXT */

    REGISTER int value, indx;

    if (__isString(self)) {
	if (__isCharacter(aCharacter)) {
	    value = _intVal(_characterVal(aCharacter));
	    if ((value > 0) 
	     && (value <= 255)
	     && __isSmallInteger(index)) {
		indx = _intVal(index);
		if (indx > 0) {
		    if (indx <= (_stringSize(self))) {
			_stringVal(self)[indx-1] = value;
			RETURN ( aCharacter );
		    }
		}
	    }
	}
    }
%}.
    (self isMemberOf:String) ifFalse:[
	^ super at:index put:aCharacter
    ].

    (aCharacter isMemberOf:Character) ifFalse:[
	"
	 tried to store something which is not a character
	"
	^ self elementNotCharacter
    ].
    (aCharacter asciiValue between:1 and:255) ifFalse:[
	"
	 tried to store a multibyte character
	"
	^ self elementBoundsError
    ].
    "
     invalid index
    "
    ^ self subscriptBoundsError:index
!

basicAt:index put:aCharacter
    "store the argument, aCharacter at position index, an Integer
     - reimplemented here since we store characters
     (but only for Strings, since subclasses may redefine basicAt:put:)."

%{  /* NOCONTEXT */

    REGISTER int value, indx;

    if (__isString(self)) {
	if (__isCharacter(aCharacter)) {
	    value = _intVal(_characterVal(aCharacter));
	    if ((value > 0) 
	     && (value <= 255)
	     && __isSmallInteger(index)) {
		indx = _intVal(index);
		if (indx > 0) {
		    if (indx <= (_stringSize(self))) {
			_stringVal(self)[indx-1] = value;
			RETURN ( aCharacter );
		    }
		}
	    }
	}
    }
%}.
    (self isMemberOf:String) ifFalse:[
	^ super basicAt:index put:aCharacter
    ].

    (aCharacter isMemberOf:Character) ifFalse:[
	"
	 tried to store something which is not a character
	"
	^ self elementNotCharacter
    ].
    (aCharacter asciiValue between:1 and:255) ifFalse:[
	"
	 tried to store a multibyte character
	"
	^ self elementBoundsError
    ].
    "
     invalid index
    "
    ^ self subscriptBoundsError:index
! !

!String methodsFor:'converting'!

asSymbol
    "return a unique symbol with the name taken from myself.
     The receiver must be a String, subclass instances are (currently)
     not allowed."
%{
    if (__qClass(self) == String) {
	RETURN ( _MKSYMBOL(_stringVal(self), (OBJ *)0, __context) );
    }
%}.
    self primitiveFailed
!

asSymbolIfInterned
    "if a symbol with the receivers characters is already known, return it.
     Otherwise, return nil. This can be used to query for an existing
     symbol and is the same as
	self knownAsSymbol ifTrue:[self asSymbol] ifFalse:[nil]
     but slightly faster, since the symbol lookup operation is only
     performed once.
     The receiver must be a String, subclass instances are (currently)
     not allowed."

%{  /* NOCONTEXT */
    OBJ __SYMBOL_OR_NIL();

    if (__qClass(self) == String) {
	RETURN ( __SYMBOL_OR_NIL(_stringVal(self)));
    }
%}.
    self primitiveFailed
! !

!String class methodsFor:'binary storage'!

binaryDefinitionFrom:stream manager:manager
    "read a binary representation from stream."

    ^ (stream next:(stream nextNumber: 4)) asString
! !

!String methodsFor:'binary storage'!

storeBinaryOn:stream manager:manager
    "append a binary representation of the receiver onto stream.
     Redefined to store short Strings in a more compact way with a
     special type code."

    |sz "{ Class: SmallInteger }"|

    sz := self size.
    "not, if I am a thingy of a subclass"
    ((self class ~~ String) or:[sz > 255]) ifTrue:[
	^ super storeBinaryOn:stream manager:manager
    ].
    stream nextPut:manager codeForString.
    stream nextPut:sz.
    1 to:sz do:[:index |
	stream nextPut:(self basicAt:index) asciiValue
    ]
!

!String methodsFor:'printing & storing'!

isLiteral
    "return true, if the receiver can be used as a literal
     (i.e. can be used in constant arrays)"

    ^ true
!

print
    "print the receiver on standard output.
     This method does NOT use the stream classes and will therefore work
     even in case of emergency."

%{  /* NOCONTEXT */

    if (__qClass(self) == String) {
	printf("%s", _stringVal(self));
	RETURN (self);
    }
%}
.
    ^ super print
!

printfPrintString:formatString
    "non-portable but sometimes useful.
     Return a printed representation of the receiver as specified by formatString, 
     which is defined by printf.
     No checking on buffer overrun is done; the resulting string may not
     be larger than 799 characters.
     This method is NONSTANDARD and may be removed without notice."

%{  /* STACK: 1000 */

    char buffer[800];
    char *cp;
    OBJ s;

    if (__isString(formatString)) {
#ifdef THISCONTEXT_IN_REGISTER
	/*
	 * actually only needed on sparc: since thisContext is
	 * in a global register, which gets destroyed by printf,
	 * manually save it here - very stupid ...
	 */
	extern OBJ __thisContext__;
	__thisContext__ = __thisContext;
#endif

	cp = (char *)_stringVal(self);
	if (__qClass(self) != String)
	    cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(self))->c_ninstvars));

	sprintf(buffer, (char *)_stringVal(formatString), cp);
#ifdef THISCONTEXT_IN_REGISTER
	__thisContext = __thisContext__;
	__thisContext__ = nil;
#endif

	s = _MKSTRING(buffer COMMA_SND);
	if (s != nil) {
	    RETURN (s);
	}
    }
%}.
    self primitiveFailed

    "
     'hello' printfPrintString:'%%s -> %s'     
     'hello' printfPrintString:'%%10s -> %10s'  
     'hello' printfPrintString:'%%-10s -> %-10s' 
    "
!

storeString
    "return a String for storing myself"

    |s n index|

    n := self occurrencesOf:$'.
    n == 0 ifFalse:[
	s := String new:(n + 2 + self size).
	s at:1 put:$'.
	index := 2.
	self do:[:thisChar |
	    (thisChar == $') ifTrue:[
		s at:index put:thisChar.
		index := index + 1.
	    ].
	    s at:index put:thisChar.
	    index := index + 1.
	].
	s at:index put:$'.
	^ s
    ].
    ^ '''' , self , ''''
!

storeOn:aStream
    "put the storeString of myself on aStream"

    aStream nextPut: $'.
    (self includes:$') ifTrue:[
	self do:[:thisChar |
	    (thisChar == $') ifTrue:[aStream nextPut:thisChar].
	    aStream nextPut:thisChar
	]
    ] ifFalse:[
	aStream nextPutAll:self
    ].
    aStream nextPut:$'
! !

!String methodsFor:'comparing'!

> aString
    "Compare the receiver with the argument and return true if the
     receiver is greater than the argument. Otherwise return false.
     No national variants are honored; use after: for this.
     In contrast to ST-80, case differences are NOT ignored, thus
     'foo' > 'Foo' will return true. 
     This may change."

%{  /* NOCONTEXT */

    int len1, len2, cmp;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;
    OBJ cls;

    if (__isNonNilObject(s)
     && (((cls = __qClass(s)) == String) || (cls == Symbol) || (cls == __qClass(self)))) {
	cp1 = (char *) _stringVal(self);
	len1 = _stringSize(self);

	/*
	 * care for instances of subclasses ...
	 */
	if (__qClass(self) != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(self))->c_ninstvars));

	    cp1 += n;
	    len1 -= n;
	}

	cp2 = (char *) _stringVal(s);
	len2 = _stringSize(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	    len2 -= n;
	}

	if (len1 <= len2)
	    cmp = strncmp(cp1, cp2, len1);
	else
	    cmp = strncmp(cp1, cp2, len2);

	if (cmp > 0) {
	    RETURN ( true );
	}
	if ((cmp == 0) && (len1 > len2)) {
	    RETURN ( true );
	}
	RETURN ( false );
    }
%}.
    ^ super > aString
!

= aString
    "Compare the receiver with the argument and return true if the
     receiver is equal to the argument. Otherwise return false.
     This compare is case-sensitive (i.e. 'Foo' is NOT = 'foo').
     Use sameAs: to compare with case ignored."

%{  /* NOCONTEXT */

    int l1, l2;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;
    OBJ cls;

    if (s == self) {
	RETURN ( true );
    }
    if (! __isNonNilObject(s)) {
	RETURN ( false );
    }

    if (((cls = __qClass(s)) == String) || (cls == Symbol) || (cls == __qClass(self))) {
	cp1 = (char *) _stringVal(self);
	l1 = _stringSize(self);
	/*
	 * care for instances of subclasses ...
	 */
	if (__qClass(self) != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(self))->c_ninstvars));

	    cp1 += n;
	    l1 -= n;
	}

	cp2 = (char *) _stringVal(s);
	l2 = _stringSize(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	    l2 -= n;
	}

	if (l1 != l2) {
	    RETURN ( false );
	}
	RETURN ( (strncmp(cp1, cp2, l1) == 0) ? true : false );
    }
%}
.
    ^ super = aString

    "
     'foo' = 'Foo' 
     'foo' sameAs: 'Foo' 
    "
!

~= aString
    "Compare the receiver with the argument and return true if the
     receiver is not equal to the argument. Otherwise return false.
     This compare is case-sensitive (i.e. 'Foo' is NOT = 'foo')"

%{  /* NOCONTEXT */

    int l1, l2;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;
    OBJ cls;

    if (s == self) {
	RETURN ( false );
    }
    if (! __isNonNilObject(s)) {
	RETURN ( true );
    }
    if (((cls = __qClass(s)) == String) || (cls == Symbol) || (cls == __qClass(self))) {
	cp1 = (char *) _stringVal(self);
	l1 = _stringSize(self);
	/*
	 * care for instances of subclasses ...
	 */
	if (__qClass(self) != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(self))->c_ninstvars));

	    cp1 += n;
	    l1 -= n;
	}

	cp2 = (char *) _stringVal(s);
	l2 = _stringSize(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	    l2 -= n;
	}

	if (l1 != l2) {
	    RETURN ( true );
	}
	RETURN ( (strncmp(cp1, cp2, l1) == 0) ? false : true );
    }
%}
.
    ^ super ~= aString
!

after:aString
    "Compare the receiver with the argument and return true if the
     receiver should come after the argument in a sorted list. 
     Otherwise return false.
     The comparison is language specific, depending on the value of
     LC_COLLATE, which is initialized from the environment."

%{  /* NOCONTEXT */

    int cmp;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;
    OBJ cls;

    if (__isNonNilObject(s)
     && (((cls = __qClass(s)) == String) || (cls == Symbol) || (cls == __qClass(self)))) {
	cp1 = (char *) _stringVal(self);

	/*
	 * care for instances of subclasses ...
	 */
	if (__qClass(self) != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(self))->c_ninstvars));

	    cp1 += n;
	}

	cp2 = (char *) _stringVal(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	}

#ifdef HAS_STRCOLL
	cmp = strcoll(cp1, cp2);
#else
	cmp = strcmp(cp1, cp2);
#endif

	if (cmp > 0) {
	    RETURN ( true );
	}
	RETURN ( false );
    }
%}.
    "
     currently, this operation is only defined for strings, symbols
     and subclasses.
    "
    self primitiveFailed
! !

!String methodsFor:'character searching'!

occurrencesOf:aCharacter
    "count the occurrences of the argument, aCharacter in myself
      - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int byteValue;
    REGISTER int count;
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	count = 0;
	byteValue = _intVal(_characterVal(aCharacter));
	cp = _stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	while (*cp) {
	    if (*cp++ == byteValue) count++;
	}
	RETURN ( _MKSMALLINT(count) );
    }
%}.
    ^ 0

    "
     'hello world' occurrencesOf:$a
     'hello world' occurrencesOf:$w
     'hello world' occurrencesOf:$l 
     'hello world' occurrencesOf:$x  
     'hello world' occurrencesOf:1 
    "
!

includes:aCharacter
    "return true if the argument, aCharacter is included in the receiver
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int byteValue;
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	byteValue = _intVal(_characterVal(aCharacter));
	cp = _stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
#ifdef FAST_STRCHR
	cp = (unsigned char *) strchr(cp, _intVal(_characterVal(aCharacter)));
	if (cp) {
	    RETURN ( true );
	}
#else
	while (*cp) {
	    if (*cp == byteValue) {
		RETURN ( true );
	    }
	    cp++;
	}
#endif
    }
%}.
    ^ false

    "
     'hello world' includes:$a
     'hello world' includes:$o  
     'hello world' includes:$x  
     'hello world' includes:1    
     'hello world' asTwoByteString includes:$o  
    "
!

includesAny:aCollection
    "return true, if the receiver includes any of the characters in the
     argument, aCollection.
     - redefined for speed if the argument is a String."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned char *matchP;
    OBJ cls;

    if (__isString(aCollection)) {
	matchP = _stringVal(aCollection);
	cp = _stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

	while (*cp) {
	    if (strchr(matchP, *cp)) {
		RETURN ( true );
	    }
	    cp++;
	}
	RETURN ( false );
    }
%}
.
    ^ super includesAny:aCollection

    "
     'hello world' includesAny:'abcd'                      
     'hello world' includesAny:'xyz'                      
     'hello world' includesAny:(Array with:$a with:$b with:$d)   
     'hello world' includesAny:(Array with:$x with:$y)     
     'hello world' includesAny:(Array with:1 with:2)    
    "
!

indexOf:aCharacter
    "return the index of the first occurrences of the argument, aCharacter
     in the receiver or 0 if not found - reimplemented here for speed."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
#ifdef FAST_STRCHR
    char *strchr();
#else
    REGISTER int byteValue;
    REGISTER int index;
    char c;
#endif
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	cp = _stringVal(self);
	if ((cls = __qClass(self)) != String)
	    cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
#ifdef FAST_STRCHR
	cp = (unsigned char *) strchr(cp, _intVal(_characterVal(aCharacter)));
	if (cp) {
	    RETURN ( _MKSMALLINT(cp - _stringVal(self) + 1) );
	}
#else
	byteValue = _intVal(_characterVal(aCharacter));
	index = 1;
	while (c = *cp++) {
	    if (c == byteValue) { RETURN ( _MKSMALLINT(index) ); }
	    index++;
	}
#endif
    }
%}.
    "/ cannot include anything but characters ...
    ^ 0

    "
     'hello world' indexOf:(Character space)                  
     'hello world' indexOf:$A                      
    "
!

indexOf:aCharacter startingAt:start
    "return the index of the first occurrence of the argument, aCharacter
     in myself starting at start, anInteger or 0 if not found;
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int index, byteValue;
#ifdef FAST_STRCHR
    char *strchr();
#else
    char c;
#endif
    int len;
    OBJ cls;

    if (__isSmallInteger(start)) {
	if (__isCharacter(aCharacter)) {
	    byteValue = _intVal(_characterVal(aCharacter));
	    index = _intVal(start);
	    if (index <= 0)
		index = 1;
	    if ((cls = __qClass(self)) != String)
		index += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	    len = _stringSize(self);
	    if (index <= len) {
		cp = _stringVal(self) + index - 1;
#ifdef FAST_STRCHR
		cp = (unsigned char *) strchr(cp, byteValue);
		if (cp) {
		    RETURN ( _MKSMALLINT(cp - _stringVal(self) + 1) );
		}
#else
		while (c = *cp++) {
		    if (c == byteValue) {
			RETURN ( _MKSMALLINT(index) );
		    }
		    index++;
		}
#endif
	    }
	}
	RETURN ( _MKSMALLINT(0) );
    }
%}.
    ^ super indexOf:aCharacter startingAt:start

    "
     'hello world' indexOf:$l startingAt:1 
     'hello world' indexOf:$l startingAt:5  
    "
!

indexOfSeparatorStartingAt:start
    "return the index of the next separator character"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER char c;
    int len, index;
    OBJ cls;

    index = _intVal(start);
    if (index <= 0) {
	index = 1;
    }
    if ((cls = __qClass(self)) != String)
	index += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
    len = _stringSize(self);
    if (index > len) {
	RETURN ( _MKSMALLINT(0) );
    }
    cp = _stringVal(self) + index - 1;
    while (c = *cp++) {
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
	if (c <= ' ')
#endif
	if ((c == ' ') || (c == '\t') || (c == '\n')
	 || (c == '\r') || (c == '\f')) {
	    RETURN ( _MKSMALLINT(cp - _stringVal(self)) );
	}
    }
%}
.
    ^ 0

    "
     'hello world' indexOfSeparatorStartingAt:3 
     'hello world' indexOfSeparatorStartingAt:7 
    "
! !

!String methodsFor:'pattern matching'!

startsWith:aString
    "return true, if the receiver starts with something, aString."

%{  /* NOCONTEXT */

    int len1, len2;
    REGISTER unsigned char *src1, *src2;
    char c;

    if ((__isString(self) || __isSymbol(self))
     && (__isString(aString) || __isSymbol(aString))) {
        len1 = __qSize(self);
        len2 = __qSize(aString);
        if (len1 < len2) {
	    RETURN ( false );
        }

        src1 = _stringVal(self);
        src2 = _stringVal(aString);
        while (c = *src2++) {
	    if (c != *src1++) {
	        RETURN ( false );
	    }
	}
	RETURN (true);
    }
%}.
    (aString isMemberOf:Character) ifTrue:[
        self size == 0 ifTrue:[^ false].
        ^ (self at:1) == aString
    ].
    ^ super startsWith:aString

    "
     'hello world' startsWith:'hello'  
     'hello world' startsWith:'hi'      
     'hello world' startsWith:$h
     'hello world' startsWith:#($h $e $l)
    "
!

endsWith:aString
    "return true, if the receiver end with something, aString."

%{  /* NOCONTEXT */

    int len1, len2;
    REGISTER unsigned char *src1, *src2;
    char c;

    if ((__isString(self) || __isSymbol(self))
     && (__isString(aString) || __isSymbol(aString))) {
        len1 = __qSize(self);
        len2 = __qSize(aString);
        if (len1 < len2) {
	    RETURN ( false );
        }

        src1 = _stringVal(self) + len1 - len2;
        src2 = _stringVal(aString);
        while (c = *src2++) {
	    if (c != *src1++) {
	        RETURN ( false );
	    }
	}
	RETURN (true);
    }
%}.
    (aString isMemberOf:Character) ifTrue:[
	self size == 0 ifTrue:[^ false].
	^ (self at:(self size)) == aString
    ].
    ^ super endsWith:aString

    "
     'hello world' endsWith:'world'
     'hello world' endsWith:'earth'
     'hello world' endsWith:$d
     'hello world' endsWith:#($r $l $d)
    "
! !

!String methodsFor:'testing'!

isBlank
    "return true, if the receivers size is 0 or if it contains only spaces.
     Q: should we care for whiteSpace in general here ?"

%{  /* NOCONTEXT */

    REGISTER unsigned char *src;
    REGISTER unsigned char c;
    OBJ cls;

    src = _stringVal(self);
    if ((cls = __qClass(self)) != String)
	src += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

#ifndef NON_ASCII
    while (*((unsigned *)src) == 0x20202020) {
	src += 4;
    }
#endif
    while (c = *src++)
	if (c != ' ') {
	    RETURN ( false );
	}
%}.
    ^ true
!

levenshteinTo:aString s:substWeight c:caseWeight i:insrtWeight d:deleteWeight
    "parametrized levenshtein. arguments are the costs for
     substitution, case-change, insertion and deletion of a character."

%{  /* STACK: 4000 */

    /* 
     * this is very heavy used when correcting errors 
     * (all symbols are searched for best match) - therefore it must be fast
     */
{
    unsigned short *data;
    int l1, l2;
    REGISTER int sz;
    unsigned char *s1, *s2;
    int v1, v2, v3, m;
    REGISTER unsigned short *dp;
    REGISTER int delta;
    REGISTER int j;
    int i;
    int iW, cW, sW, dW;
#   define FASTSIZE 30
    unsigned short fastData[(FASTSIZE + 1) * (FASTSIZE + 1)];

    if ((__isString(self) || __isSymbol(self))
     && (__isString(aString) || __isSymbol(aString))
     && __bothSmallInteger(insrtWeight, caseWeight)
     && __bothSmallInteger(substWeight, deleteWeight)) {
	iW = _intVal(insrtWeight);
	cW = _intVal(caseWeight);
	sW = _intVal(substWeight);
	dW = _intVal(deleteWeight);
	s1 = _stringVal(self);
	s2 = _stringVal(aString);
	l1 = strlen(s1);
	l2 = strlen(s2);

	sz = (l1 < l2) ? l2 : l1;
	delta = sz + 1;
	if (sz <= FASTSIZE) {
	    data = fastData;
	} else {
	    /* add ifdef ALLOCA here ... */
	    data = (unsigned short *)malloc(delta * delta * sizeof(short));
	    if (! data) goto mallocFailed;
	}

	data[0] = 0;
	dp = data+1;
	for (j=1, dp=data+1; j<=sz; j++, dp++)
	    *dp = *(dp-1) + iW;

	for (i=1, dp=data+delta; i<=sz; i++, dp+=delta)
	    *dp = *(dp-delta) + dW;

	for (i=1; i<=l1; i++) {
	    for (j=1; j<=l2; j++) {
		dp = data + (i*delta) + j;
		if (s1[i] != s2[j]) {
		    if (tolower(s1[i]) == tolower(s2[j])) {
			m = cW;
		    } else {
			m = sW;
		    }
		} else
		    m = 0;

		v2 = *(dp - 1) + iW;
		v3 = *(dp - delta) + dW;
		v1 = *(dp - delta - 1) + m;
		if (v1 < v2)
		    if (v1 < v3)
			m = v1;
		    else
			m = v3;
		else
		    if (v2 < v3)
			m = v2;
		    else
			m = v3;
		*dp = m;
	    }
	}
	m = data[l1 * delta + l2];
	if (sz > FASTSIZE) 
	    free(data);
	RETURN ( _MKSMALLINT(m) );
    }
mallocFailed: ;
}
%}.

    ^ super levenshteinTo:aString 
			s:substWeight c:caseWeight 
			i:insrtWeight d:deleteWeight

    "'ocmprt' levenshteinTo:'computer'
     'computer' levenshteinTo:'computer'
     'ocmputer' levenshteinTo:'computer'
     'cmputer' levenshteinTo:'computer'
     'Computer' levenshteinTo:'computer'"
! !

!String methodsFor:'copying'!

shallowCopy
    "return a copy of the receiver"

    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super shallowCopy
!

deepCopyUsing:aDictionary
    "return a copy of the receiver - reimplemented to be a bit faster"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
!

simpleDeepCopy
    "return a copy of the receiver"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
!

deepCopy
    "return a copy of the receiver"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
!

, aString
    "return the concatenation of myself and the argument, aString as
     a String.
     - reimplemented here for speed"

%{
    int l1, l2, sz;
    char *cp1, *cp2;
    REGISTER unsigned char *dstp;
    REGISTER OBJ s = aString;
    REGISTER OBJ _string = String;
    OBJ myClass, argClass, newString;

    if (__isNonNilObject(s)) {
	myClass = __qClass(self);
	argClass = __qClass(s);
	/*
	 * can do it here if
	 * either same class or both Strings/Symbols:
	 */
	if ((myClass == argClass)
	 || (((myClass == _string) || (myClass == Symbol))
	     && ((argClass == _string) || (argClass == Symbol)))) {
		cp1 = (char *) _stringVal(self);
		l1 = _stringSize(self);
		if (myClass != _string) {
		    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(self))->c_ninstvars));

		    cp1 += n;
		    l1 -= n;
		}

		cp2 = (char *) _stringVal(s);
		l2 = _stringSize(s);
		if (argClass != _string) {
		    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(s))->c_ninstvars));

		    cp2 += n;
		    l2 -= n;
		}

		sz = OHDR_SIZE + l1 + l2 + 1;
		_qNew(newString, sz, __context);
		if (newString != nil) {
		    _InstPtr(newString)->o_class = String;
		    dstp = _stringVal(newString);
		    /*
		     * refetch in case of a GC
		     */
		    cp1 = (char *) _stringVal(self);
		    cp2 = (char *) _stringVal(aString);
#ifdef FAST_MEMCPY
		    bcopy(cp1, dstp, l1);
		    bcopy(cp2, dstp + l1, l2+1);
#else
# ifdef FAST_STRCPY
		    strcpy(dstp, cp1);
		    strcpy(dstp + l1, cp2);
# else
		    while ((*dstp++ = *cp1++) != '\0') ;
		    dstp--;
		    while ((*dstp++ = *cp2++) != '\0') ;
# endif
#endif
		    RETURN ( newString );
		}
	}
    }
%}
.
    ^ super , aString
!

concatenate:string1 and:string2
    "return the concatenation of myself and the arguments, string1 and string2.
     This is equivalent to self , string1 , string2
     - generated by compiler when such a construct is detected"

    |newString|
%{
    int len1, len2, len3, sz;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;

    if ((__isString(self) || __isSymbol(self))
     && (__isString(string1) || __isSymbol(string1))
     && (__isString(string2) || __isSymbol(string2))) {
	len1 = _stringSize(self);
	len2 = _stringSize(string1);
	len3 = _stringSize(string2);
	sz = OHDR_SIZE + len1 + len2 + len3 + 1;
	_qNew(newString, sz, __context);
	if (newString != nil) {
	    _InstPtr(newString)->o_class = String;
	    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
	    bcopy(_stringVal(self), dstp, len1);
	    bcopy(_stringVal(string1), dstp + len1, len2);
	    bcopy(_stringVal(string2), dstp + len1 + len2, len3+1);
#else
# ifdef FAST_STRCPY
	    strcpy(dstp, _stringVal(self));
	    strcpy(dstp + len1, _stringVal(string1));
	    strcpy(dstp + len1 + len2, _stringVal(string2));
# else
	    srcp = _stringVal(self);
	    while ((*dstp++ = *srcp++) != '\0') ;
	    dstp--;
	    srcp = _stringVal(string1);
	    while ((*dstp++ = *srcp++) != '\0') ;
	    dstp--;
	    srcp = _stringVal(string2);
	    while ((*dstp++ = *srcp++) != '\0') ;
# endif
#endif
	    RETURN ( newString );
	}
    }
%}
.
    ^ super , string1 , string2
!

concatenate:string1 and:string2 and:string3
    "return the concatenation of myself and the string arguments.
     This is equivalent to self , string1 , string2 , string3
     - generated by compiler when such a construct is detected"

    |newString|
%{
    int len1, len2, len3, len4, sz;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;

    if ((__isString(self) || __isSymbol(self))
     && (__isString(string1) || __isSymbol(string1))
     && (__isString(string2) || __isSymbol(string2))
     && (__isString(string3) || __isSymbol(string3))) {
	len1 = _stringSize(self);
	len2 = _stringSize(string1);
	len3 = _stringSize(string2);
	len4 = _stringSize(string3);
	sz = OHDR_SIZE + len1 + len2 + len3 + len4 + 1;
	_qNew(newString, sz, __context);
	if (newString != nil) {
	    _InstPtr(newString)->o_class = String;
	    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
	    bcopy(_stringVal(self), dstp, len1);
	    bcopy(_stringVal(string1), dstp + len1, len2);
	    bcopy(_stringVal(string2), dstp + len1 + len2, len3);
	    bcopy(_stringVal(string3), dstp + len1 + len2 + len3, len4+1);
#else
# ifdef FAST_STRCPY
	    strcpy(dstp, _stringVal(self));
	    strcpy(dstp + len1, _stringVal(string1));
	    strcpy(dstp + len1 + len2, _stringVal(string2));
	    strcpy(dstp + len1 + len2 + len3, _stringVal(string3));
# else
	    srcp = _stringVal(self);
	    while ((*dstp++ = *srcp++) != '\0') ;
	    dstp--;
	    srcp = _stringVal(string1);
	    while ((*dstp++ = *srcp++) != '\0') ;
	    dstp--;
	    srcp = _stringVal(string2);
	    while ((*dstp++ = *srcp++) != '\0') ;
	    dstp--;
	    srcp = _stringVal(string3);
	    while ((*dstp++ = *srcp++) != '\0') ;
# endif
#endif
	    RETURN ( newString );
	}
    }
%}
.
    ^ super , string1 , string2 , string3
!

copyWith:aCharacter
    "return a new string containing the receivers characters
     and the single new character, aCharacter. 
     This is different from concatentation, which expects another string
     as argument, but equivalent to copy-and-addLast.
     Reimplemented here for more speed"

%{  /* NOCONTEXT */

    int sz;
    REGISTER unsigned char *dstp;
    int offs;
    OBJ cls, newString;

    if (__isCharacter(aCharacter)) {
	sz = __qSize(self) + 1;
	if ((cls = __qClass(self)) != String) {
	    offs = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	    sz -= offs;
	} else
	    offs = 0;

	PROTECT_CONTEXT
	_qNew(newString, sz, SENDER);
	UNPROTECT_CONTEXT
	if (newString) {
	    _InstPtr(newString)->o_class = String;
	    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
	    sz = sz - OHDR_SIZE - 1 - 1;
	    bcopy(_stringVal(self) + offs, dstp, sz);
	    dstp += sz;
#else
# ifdef FAST_STRCPY
	    strcpy(dstp, _stringVal(self) + offs);
	    dstp += sz - OHDR_SIZE - 1 - 1;
# else
	    {
		REGISTER unsigned char *srcp;

		srcp = _stringVal(self) + offs;
		while ((*dstp = *srcp++) != '\0')
		    dstp++;
	    }
# endif
#endif
	    *dstp++ = _intVal(_characterVal(aCharacter));
	    *dstp = '\0';
	    RETURN (newString );
	}
    }
%}.
    "fall back in case of non-character arg;
     will eventually lead to an bad element signal raise"

    ^ super copyWith:aCharacter
!

copyFrom:start to:stop
    "return the substring starting at index start, anInteger and ending
     at stop, anInteger. This method will always return a string, even
     if the receiver is a subclass-instance. This might change if there is a need.
     - reimplemented here for speed"

%{  /* NOCONTEXT */

#if !defined(FAST_MEMCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    REGISTER int count;
    int len, sz, index1, index2;
    OBJ newString;

    if (__bothSmallInteger(start, stop)) {
	len = _stringSize(self);
	index1 = _intVal(start);
	index2 = _intVal(stop);

	if ((index1 <= index2) && (index1 > 0)) {
	    if (__qClass(self) != String) {
		int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(self))->c_ninstvars));

		index1 += n;
		index2 += n;
	    }
	    if (index2 <= len) {
		count = index2 - index1 + 1;
		PROTECT_CONTEXT
		sz = OHDR_SIZE + count + 1;
		_qNew(newString, sz, SENDER);
		UNPROTECT_CONTEXT
		if (newString != nil) {
		    _InstPtr(newString)->o_class = String;
		    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
		    bcopy(_stringVal(self) + index1 - 1, dstp, count);
		    dstp[count] = '\0';
#else
# ifdef FAST_STRCPY
		    strncpy(dstp, _stringVal(self) + index1 - 1, count);
		    dstp[count] = '\0';
# else
		    srcp = _stringVal(self) + index1 - 1;
		    while (count--) {
			*dstp++ = *srcp++;
		    }
		    *dstp = '\0';
# endif
#endif
		    RETURN ( newString );
		}
	    }
	}
	/*
	 * allow empty copy
	 */
	if (index1 > index2) {
	    PROTECT_CONTEXT
	    _qNew(newString, OHDR_SIZE+1, SENDER);
	    UNPROTECT_CONTEXT
	    if (newString != nil) {
		_InstPtr(newString)->o_class = String;
		__stringVal(newString)[0] = '\0';
		RETURN ( newString );
	    }
	}
    }
%}.
    "fall back in case of non-integer index or out-of-bound index;
     will eventually lead to an out-of-bound signal raise"

    ^ super copyFrom:start to:stop
!

copyFrom:start
    "return the substring from start, anInteger to the end.
     This method will always return a string, even if the receiver 
     is a subclass-instance. This might change if there is a need.
     - reimplemented here for speed"

%{  /* NOCONTEXT */

#if !defined(FAST_MEMCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    REGISTER int count;
    int len, index1, sz;
    OBJ newString;

    if (__isSmallInteger(start)) {
	len = _stringSize(self);
	index1 = _intVal(start);
	if (index1 > 0) {
	    if (__qClass(self) != String) {
		int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(__qClass(self))->c_ninstvars));

		index1 += n;
	    }
	    if (index1 <= len) {
		count = len - index1 + 1;
		PROTECT_CONTEXT
		sz = OHDR_SIZE + count + 1;
		_qNew(newString, sz, SENDER);
		UNPROTECT_CONTEXT
		if (newString != nil) {
		    _InstPtr(newString)->o_class = String;
		    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
		    bcopy(_stringVal(self) + index1 - 1, dstp, count);
		    dstp[count] = '\0';
#else
# ifdef FAST_STRCPY
		    strncpy(dstp, _stringVal(self) + index1 - 1, count);
		    dstp[count] = '\0';
# else
		    srcp = _stringVal(self) + index1 - 1;
		    while (count--) {
			*dstp++ = *srcp++;
		    }
		    *dstp = '\0';
# endif
#endif
		    RETURN ( newString );
		}
	    }
	}
    }
%}.
    "fall back in case of non-integer index or out-of-bound index;
     will eventually lead to an out-of-bound signal raise"

    ^ super copyFrom:start
! !

!String methodsFor:'filling and replacing'!

replaceFrom:start to:stop with:aString startingAt:repStart
    "replace the characters starting at index start, anInteger and ending
     at stop, anInteger with characters from aString starting at repStart.

     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp, *dstp;
    REGISTER int count;
    int len, index1, index2;
    int repLen, repIndex;

    if ((__isString(aString) || __isSymbol(aString))
     && __isString(self)
     && __bothSmallInteger(start, stop)) {
	len = _stringSize(self);
	index1 = _intVal(start);
	index2 = _intVal(stop);
	count = index2 - index1 + 1;
	if (count <= 0) {
	     RETURN (self);
	}
	if ((index2 <= len) && (index1 > 0)) {
	    repLen = _stringSize(aString);
	    repIndex = _intVal(repStart);
	    if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) {
		srcp = _stringVal(aString) + repIndex - 1;
		dstp = _stringVal(self) + index1 - 1;
		if (aString == self) {
		    /* take care of overlapping copy */
		    if (srcp < dstp) {
			/* must do a reverse copy */
			srcp += count;
			dstp += count;
			while (count-- > 0) {
			    *--dstp = *--srcp;
			}
			RETURN (self);
		    }
		}
#ifdef FAST_MEMCPY
		bcopy(srcp, dstp, count);
#else
		while (count-- > 0) {
		    *dstp++ = *srcp++;
		}
#endif
		RETURN (self);
	    }
	}
    }
%}
.
    ^ super replaceFrom:start to:stop with:aString startingAt:repStart
!

replaceAll:oldCharacter by:newCharacter
    "replace all oldCharacters by newCharacter in the receiver"

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp;
    REGISTER unsigned oldVal, newVal;

    if (__isCharacter(oldCharacter)
     && __isCharacter(newCharacter)
     && __isString(self)) {
	srcp = (unsigned char *)_stringVal(self);
	oldVal = _intVal(_characterVal(oldCharacter));
	newVal = _intVal(_characterVal(newCharacter));
	while (*srcp) {
	    if (*srcp == oldVal)
		*srcp = newVal;
	    srcp++;
	}
	RETURN ( self );
    }
%}
.
    ^ super replaceAll:oldCharacter by:newCharacter

    "
     'helloWorld' copy replaceAll:$o by:$O 
    "
!

reverse                                                                         
    "in-place reverse the characters of the string."

    "Q: is there a need to redefine it here ?"

%{  /* NOCONTEXT */

    REGISTER char c;
    REGISTER unsigned char *hip, *lowp;

    if (__isString(self)) {
	lowp = _stringVal(self);
	hip = lowp + _stringSize(self) - 1;
	while (lowp < hip) {
	    c = *lowp;
	    *lowp = *hip;
	    *hip = c;
	    lowp++;
	    hip--;
	}
	RETURN ( self );
    }
%}
.
    ^ super reverse
!

atAllPut:aCharacter
    "replace all elements with aCharacter
     - reimplemented here for speed"

%{  /* NOCONTEXT */

#ifndef FAST_MEMSET
    REGISTER unsigned char *dst;
    REGISTER int l;
#endif
    REGISTER int byteValue;

    if (__isCharacter(aCharacter) && __isString(self)) {
	byteValue = _intVal(_characterVal(aCharacter));
#ifdef FAST_MEMSET
	memset(_stringVal(self), byteValue, __qSize(self) - OHDR_SIZE - 1);
#else
	dst = _stringVal(self);
	l = __qSize(self) - OHDR_SIZE - 1;
	while (l >= 8) {
	    dst[0] = dst[1] = dst[2] = dst[3] = byteValue;
	    dst[4] = dst[5] = dst[6] = dst[7] = byteValue;
	    dst += 8;
	    l -= 8;
	}
	while (l--)
	    *dst++ = byteValue;
#endif
	RETURN ( self );
    }
%}
.
    ^ super atAllPut:aCharacter

    "
     (String new:10) atAllPut:$*   
     String new:10 withAll:$*     
    "
!

withoutSpaces
    "return a copy of myself without leading and trailing spaces.
     Notice, this is different from String>>withoutSeparators."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz blank|

    startIndex := 0.
%{
    REGISTER unsigned char *cp0;
    REGISTER unsigned char *cp;

    /* ignore instances of subclasses ... */
    if (__qClass(self) == String) {
	cp = cp0 = _stringVal(self);
	while (*cp == ' ') cp++;
	startIndex = _MKSMALLINT(cp - cp0 + 1);
	cp = cp + strlen(cp) - 1;
	while ((cp >= cp0) && (*cp == ' ')) cp--;
	endIndex = _MKSMALLINT(cp - cp0 + 1);
    }
%}.
    startIndex == 0 ifTrue:[^ super withoutSpaces].

    startIndex > endIndex ifTrue:[^ ''].
    ((startIndex == 1) and:[endIndex == self size]) ifTrue:[^ self].
    ^ self copyFrom:startIndex to:endIndex
!

withoutSeparators
    "return a copy of myself without leading and trailing whitespace.
     Notice, this is different from String>>withoutSpaces."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz|

    startIndex := 0.
%{
    REGISTER unsigned char *cp0;
    REGISTER unsigned char *cp;
    REGISTER unsigned char c;

    /* ignore instances of subclasses ... */
    if (__qClass(self) == String) {
	cp = cp0 = _stringVal(self);
	c = *cp;
	while (
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
	    (c <= ' ') &&
#endif
	    ((c == ' ') || (c == '\n') || (c == '\t')
			|| (c == '\r') || (c == '\f'))
	) {
	    cp++;
	    c = *cp;
	}
	startIndex = _MKSMALLINT(cp - cp0 + 1);
	cp = cp + strlen(cp) - 1;
	while ((cp >= cp0) && (*cp == ' ')) cp--;
	c = *cp;
	while ((cp >= cp0) &&
#ifndef NON_ASCII       /* i.e. EBCDIC ;-) */
	       (c <= ' ') &&
#endif
	       ((c == ' ') || (c == '\n') || (c == '\t')
			   || (c == '\r') || (c == '\f'))) {
	    cp--;
	    c = *cp;
	}
	endIndex = _MKSMALLINT(cp - cp0 + 1);
    }
%}.
    startIndex == 0 ifTrue:[^ super withoutSeparators].

    startIndex > endIndex ifTrue:[^ ''].
    ((startIndex == 1) and:[endIndex == self size]) ifTrue:[^ self].
    ^ self copyFrom:startIndex to:endIndex
! !

!String methodsFor:'queries'!

encoding
    "assume iso8859 encoding"

    ^ #iso8859
!

knownAsSymbol
    "return true, if there is a symbol with same characters in the
     system - use to check for existance of a symbol without creating one"

%{  /* NOCONTEXT */
    extern OBJ __KNOWNASSYMBOL();

    RETURN ( __KNOWNASSYMBOL(_stringVal(self)) );
%}
!

isEmpty
    "return true if the receiver is empty (i.e. if size == 0)
     Redefined here for performance"

%{  /* NOCONTEXT */
    OBJ cls;

    cls = __qClass(self);
    if ((cls == String) || (cls == Symbol)) {
	RETURN ( (_stringSize(self) == 0) ? true : false);
    }
%}
.
    ^ super isEmpty
! !