String.st
author claus
Mon, 10 Oct 1994 01:29:28 +0100
changeset 159 514c749165c3
parent 95 d22739a0c6e9
child 186 a4c3032fc825
permissions -rw-r--r--
*** empty log message ***

"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

AbstractString subclass:#String
       instanceVariableNames:''
       classVariableNames:''
       poolDictionaries:''
       category:'Collections-Text'
!

String comment:'
COPYRIGHT (c) 1988 by Claus Gittinger
	     All Rights Reserved

$Header: /cvs/stx/stx/libbasic/String.st,v 1.18 1994-10-10 00:28:43 claus Exp $
'!

!String class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 1988 by Claus Gittinger
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

version
"
$Header: /cvs/stx/stx/libbasic/String.st,v 1.18 1994-10-10 00:28:43 claus Exp $
"
!

documentation
"
    Strings are ByteArrays storing Characters.

    Strings are kind of kludgy: to allow for easy handling by c-functions,
    there is always one 0-byte added at the end, which is not counted
    in size. also, the at:put: method does not allow for storing 0-bytes.
    (to do this, the basicAt:put: and basicNew: methods are redefined)

    You cannot add any instvars to String, since the the run time system & compiler
    creates literal strings and knows that strings have no named instvars.

    If you really need strings with instVars, you have to create a subclass 
    of String (the access functions defined here can handle this).
    A little warning though: not all smalltalk systems allow subclassing String,
    so your program may become unportable if you do so.
"
! !

%{
#include <stdio.h>
#include <ctype.h>

/*
 * old st/x creates strings with spaces in it;
 * new st/x will fill it with zeros (for st-80 compatibility)
 * the define below sets old behavior.
 */
#define INITIALIZE_WITH_SPACE
%}

!String class methodsFor:'queries'!

isBuiltInClass
    "this class is known by the run-time-system"

    ^ self == String
! !

!String class methodsFor:'instance creation'!

basicNew:anInteger
    "return a new empty string with anInteger characters"

%{  /* NOCONTEXT */

    OBJ newString;
    REGISTER int len;
    REGISTER unsigned char *cp;
    REGISTER OBJ *op;
    int nInstVars, instsize;
    extern OBJ new();

    if (_isSmallInteger(anInteger)) {
	len = _intVal(anInteger);
	if (len >= 0) {
	    if (self == String) {
		instsize = OHDR_SIZE + len + 1;
		if (_CanDoQuickNew(instsize)) {
		    /*
		     * the most common case
		     */
		    _qCheckedNew(newString, instsize);
		    _InstPtr(newString)->o_class = self;
		    cp = _stringVal(newString);
#ifdef FAST_MEMSET
		    memset(cp, ' ', len);
		    *(cp + len) = '\0';
#else
		    while (len >= 8) {
			cp[0] = cp[1] = cp[2] = cp[3] = ' ';
			cp[4] = cp[5] = cp[6] = cp[7] = ' ';
			cp += 8; 
			len -= 8;
		    }
		    while (len--)
			*cp++ = ' ';
		    *cp = '\0';
#endif
		    RETURN (newString);
		}
		nInstVars = 0;
	    } else {
		nInstVars = _intVal(_ClassInstPtr(self)->c_ninstvars);
		instsize = OHDR_SIZE + __OBJS2BYTES__(nInstVars) + len + 1;
	    }

	    PROTECT_CONTEXT
	    _qNew(newString, instsize, SENDER);
	    UNPROTECT_CONTEXT
	    if (newString == nil) goto fail;
	    _InstPtr(newString)->o_class = self;

	    if (nInstVars) {
#if defined(memset4)
		memset4(_InstPtr(newString)->i_instvars, nil, nInstVars);
#else
# if defined(FAST_MEMSET) && ! defined(NEGATIVE_ADDRESSES)
		/*
		 * knowing that nil is 0
		 */
		memset(_InstPtr(newString)->i_instvars, 0, __OBJS2BYTES__(nInstVars));
# else
		op = _InstPtr(newString)->i_instvars;
		do {
		    *op++ = nil;
		} while (--nInstVars);
# endif
#endif
		cp = _stringVal(newString) + __OBJS2BYTES__(nInstVars);
	    } else {
		cp = _stringVal(newString);
	    }

#ifdef FAST_MEMSET
	    memset(cp, ' ', len);
	    *(cp + len) = '\0';
#else
	    while (len >= 8) {
		cp[0] = cp[1] = cp[2] = cp[3] = ' ';
		cp[4] = cp[5] = cp[6] = cp[7] = ' ';
		cp += 8;
		len -= 8;
	    }
	    while (len--)
		*cp++ = ' ';
	    *cp = '\0';
#endif
	    RETURN (newString);
	}
    }
fail: ;;
%}
.
    "
     invalid argument, or out-of-memory:
     use error handling in superclass
    "
    ^ (super basicNew:anInteger) atAllPut:(Character space)
!

unititializedNew:anInteger
    "redefine it back - strings must have a 0-byte at the end"

    ^ self basicNew:anInteger
! !

!String methodsFor:'accessing'!

basicSize
    "return the number of characters in myself.
     Redefined here to exclude the 0-byte at the end."

%{  /* NOCONTEXT */
    OBJ cls;

    cls = _qClass(self);
    if ((cls == String) || (cls == Symbol)) {
	RETURN ( _MKSMALLINT(_stringSize(self)) );
    }
%}
.
    ^ super basicSize - 1
!

basicAt:index
    "return the character at position index, an Integer
     - reimplemented here since we return characters"

%{  /* NOCONTEXT */

    REGISTER int indx;
    int len;
    OBJ cls;

    if (_isSmallInteger(index)) {
	indx = _intVal(index);
	cls = _qClass(self);
	if (cls != String)
	    indx += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	len = _stringSize(self);
	if ((indx > 0) && (indx <= len)) {
	    RETURN ( _MKCHARACTER(_stringVal(self)[indx-1] & 0xFF) );
	}
    }
%}.
    ^ self subscriptBoundsError:index
!

basicAt:index put:aCharacter
    "store the argument, aCharacter at position index, an Integer
     - reimplemented here since we store characters"

%{  /* NOCONTEXT */

    REGISTER int value, indx;
    int len;
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	value = _intVal(_characterVal(aCharacter));
	if ((value > 0) 
	 && (value <= 255)
	 && _isSmallInteger(index)) {
	    indx = _intVal(index);
	    cls = _qClass(self);
	    if (cls != String)
		indx += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	    len = _stringSize(self);
	    if ((indx > 0) && (indx <= len)) {
		_stringVal(self)[indx-1] = value;
		RETURN ( aCharacter );
	    }
	}
    }
%}.
    (aCharacter isMemberOf:Character) ifFalse:[
	^ self elementNotCharacter
    ] ifTrue:[
	(aCharacter asciiValue between:1 and:255) ifFalse:[
	    ^ self elementBoundsError
	] ifTrue:[
	    ^ self subscriptBoundsError:index
	]
    ]
! !

!String methodsFor:'converting'!

asSymbol
    "return a unique symbol with name taken from myself.
     The argument must be a String, subclass instances are not allowed."
%{
    if (_qClass(self) == String) {
	RETURN ( _MKSYMBOL(_stringVal(self), (OBJ *)0, __context) );
    }
%}
.
    self primitiveFailed
! !

!String class methodsFor:'binary storage'!

binaryDefinitionFrom: stream manager: manager
    ^ (stream next: (stream nextNumber: 4)) asString
! !

!String methodsFor:'printing & storing'!

isLiteral
    "return true, if the receiver can be used as a literal
     (i.e. can be used in constant arrays)"

    ^ true
!

print
    "print the receiver on standard output.
     This method does NOT use the stream classes and will therefore work
     even in case of emergency."

%{  /* NOCONTEXT */

    if (_qClass(self) == String) {
	printf("%s", _stringVal(self));
	RETURN (self);
    }
%}
.
    ^ super print
!

printfPrintString:formatString
    "non-portable but sometimes useful.
     Return a printed representation of the receiver as specified by formatString, 
     which is defined by printf.
     No checking on buffer overrun is done.
     This method is NONSTANDARD and may be removed without notice."

%{  /* STACK: 1000 */

    char buffer[800];
    char *cp;

    if (__isString(formatString)) {
	/*
	 * actually only needed on sparc: since thisContext is
	 * in a global register, which gets destroyed by printf,
	 * manually save it here - very stupid ...
	 */
	OBJ sav = __thisContext;

	cp = (char *)_stringVal(self);
	if (_qClass(self) != String)
	    cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));

	sprintf(buffer, (char *)_stringVal(formatString), cp);
	__thisContext = sav;
	RETURN ( _MKSTRING(buffer COMMA_SND) );
    }
%}
.
    self primitiveFailed

    "'hello' printfPrintString:'%%s -> %s'"
    "'hello' printfPrintString:'%%10s -> %10s'"
    "'hello' printfPrintString:'%%-10s -> %-10s'"
!

storeString
    "return a String for storing myself"

    |s n index|

    n := self occurrencesOf:$'.
    n == 0 ifFalse:[
	s := String new:(n + 2 + self size).
	s at:1 put:$'.
	index := 2.
	self do:[:thisChar |
	    (thisChar == $') ifTrue:[
		s at:index put:thisChar.
		index := index + 1.
	    ].
	    s at:index put:thisChar.
	    index := index + 1.
	].
	s at:index put:$'.
	^ s
    ].
    ^ '''' , self , ''''
!

storeOn:aStream
    "put the storeString of myself on aStream"

    aStream nextPut: $'.
    (self includes:$') ifTrue:[
	self do:[:thisChar |
	    (thisChar == $') ifTrue:[aStream nextPut:thisChar].
	    aStream nextPut:thisChar
	]
    ] ifFalse:[
	aStream nextPutAll:self
    ].
    aStream nextPut:$'
! !

!String methodsFor:'comparing'!

> aString
    "Compare the receiver with the argument and return true if the
     receiver is greater than the argument. Otherwise return false.
     No national variants are honred; use after: for this.
     In contrast to ST-80, case differences are NOT ignored, thus
     'foo' > 'Foo' will return true. 
     This may change."

%{  /* NOCONTEXT */

    int len1, len2, cmp;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;
    OBJ cls;

    if (_isNonNilObject(s)
     && (((cls = _qClass(s)) == String) || (cls == Symbol) || (cls == _qClass(self)))) {
	cp1 = (char *) _stringVal(self);
	len1 = _stringSize(self);

	/*
	 * care for instances of subclasses ...
	 */
	if (_qClass(self) != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));

	    cp1 += n;
	    len1 -= n;
	}

	cp2 = (char *) _stringVal(s);
	len2 = _stringSize(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	    len2 -= n;
	}

	if (len1 <= len2)
	    cmp = strncmp(cp1, cp2, len1);
	else
	    cmp = strncmp(cp1, cp2, len2);

	if (cmp > 0) {
	    RETURN ( true );
	}
	if ((cmp == 0) && (len1 > len2)) {
	    RETURN ( true );
	}
	RETURN ( false );
    }
%}
.
    ^ super > aString
!

= aString
    "Compare the receiver with the argument and return true if the
     receiver is equal to the argument. Otherwise return false.
     This compare is case-sensitive (i.e. 'Foo' is NOT = 'foo')"

%{  /* NOCONTEXT */

    int l1, l2;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;
    OBJ cls;

    if (s == self) {
	RETURN ( true );
    }
    if (! _isNonNilObject(s)) {
	RETURN ( false );
    }

    if (((cls = _qClass(s)) == String) || (cls == Symbol) || (cls == _qClass(self))) {
	cp1 = (char *) _stringVal(self);
	l1 = _stringSize(self);
	/*
	 * care for instances of subclasses ...
	 */
	if (_qClass(self) != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));

	    cp1 += n;
	    l1 -= n;
	}

	cp2 = (char *) _stringVal(s);
	l2 = _stringSize(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	    l2 -= n;
	}

	if (l1 != l2) {
	    RETURN ( false );
	}
	RETURN ( (strncmp(cp1, cp2, l1) == 0) ? true : false );
    }
%}
.
    ^ super = aString
!

~= aString
    "Compare the receiver with the argument and return true if the
     receiver is not equal to the argument. Otherwise return false.
     This compare is case-sensitive (i.e. 'Foo' is NOT = 'foo')"

%{  /* NOCONTEXT */

    int l1, l2;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;
    OBJ cls;

    if (s == self) {
	RETURN ( false );
    }
    if (! _isNonNilObject(s)) {
	RETURN ( true );
    }
    if (((cls = _qClass(s)) == String) || (cls == Symbol) || (cls == _qClass(self))) {
	cp1 = (char *) _stringVal(self);
	l1 = _stringSize(self);
	/*
	 * care for instances of subclasses ...
	 */
	if (_qClass(self) != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));

	    cp1 += n;
	    l1 -= n;
	}

	cp2 = (char *) _stringVal(s);
	l2 = _stringSize(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	    l2 -= n;
	}

	if (l1 != l2) {
	    RETURN ( true );
	}
	RETURN ( (strncmp(cp1, cp2, l1) == 0) ? false : true );
    }
%}
.
    ^ super ~= aString
!

after:aString
    "Compare the receiver with the argument and return true if the
     receiver should come after the argument in a sorted list. 
     Otherwise return false.
     The comparison is language specific, depending on the value of
     LC_COLLATE, which is initialized from the environment."

%{  /* NOCONTEXT */

    int cmp;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;
    OBJ cls;

    if (_isNonNilObject(s)
     && (((cls = _qClass(s)) == String) || (cls == Symbol) || (cls == _qClass(self)))) {
	cp1 = (char *) _stringVal(self);

	/*
	 * care for instances of subclasses ...
	 */
	if (_qClass(self) != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));

	    cp1 += n;
	}

	cp2 = (char *) _stringVal(s);
	/*
	 * care for instances of subclasses ...
	 */
	if (cls != String) {
	    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

	    cp2 += n;
	}

#ifdef HAS_STRCOLL
	cmp = strcoll(cp1, cp2);
#else
	cmp = strcmp(cp1, cp2);
#endif

	if (cmp > 0) {
	    RETURN ( true );
	}
	RETURN ( false );
    }
%}.
    "
     currently, this operation is only defined for strings, symbols
     and subclasses.
    "
    self primitiveFailed
! !

!String methodsFor:'character searching'!

occurrencesOf:aCharacter
    "count the occurrences of the argument, aCharacter in myself
      - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int byteValue;
    REGISTER int count;
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	count = 0;
	byteValue = _intVal(_characterVal(aCharacter));
	cp = _stringVal(self);
	if ((cls = _qClass(self)) != String)
	    cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	while (*cp) {
	    if (*cp++ == byteValue) count++;
	}
	RETURN ( _MKSMALLINT(count) );
    }
%}
.
    ^ 0

    "
     'hello world' occurrencesOf:$a
     'hello world' occurrencesOf:$w
     'hello world' occurrencesOf:$l 
     'hello world' occurrencesOf:$x  
     'hello world' occurrencesOf:1 
    "
!

includes:aCharacter
    "return true if the argument, aCharacter is included in the receiver
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int byteValue;
    extern char *strchr();
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	byteValue = _intVal(_characterVal(aCharacter));
	cp = _stringVal(self);
	if ((cls = _qClass(self)) != String)
	    cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
#ifdef FAST_STRCHR
	cp = (unsigned char *) strchr(cp, _intVal(_characterVal(aCharacter)));
	if (cp) {
	    RETURN ( true );
	}
#else
	while (*cp) {
	    if (*cp == byteValue) {
		RETURN ( true );
	    }
	    cp++;
	}
#endif
    }
%}
.
    ^ false

    "
     'hello world' includes:$a
     'hello world' includes:$o  
     'hello world' includes:$x  
     'hello world' includes:1    
    "
!

includesAny:aCollection
    "return true, if the receiver includes any of the characters in the
     argument, aCollection.
     - redefined for speed if the argument is a String."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER unsigned char *matchP;
#ifdef SYSV
# define INDEX strchr
#else
# define INDEX index
#endif
    extern char *INDEX();
    OBJ cls;

    if (__isString(aCollection)) {
	matchP = _stringVal(aCollection);
	cp = _stringVal(self);
	if ((cls = _qClass(self)) != String)
	    cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

	while (*cp) {
	    if (INDEX(matchP, *cp)) {
		RETURN ( true );
	    }
	    cp++;
	}
	RETURN ( false );
    }
%}
.
    ^ super includesAny:aCollection

    "
     'hello world' includesAny:'abcd'                      
     'hello world' includesAny:'xyz'                      
     'hello world' includesAny:(Array with:$a with:$b with:$d)   
     'hello world' includesAny:(Array with:$x with:$y)     
     'hello world' includesAny:(Array with:1 with:2)    
    "
!

indexOf:aCharacter
    "return the index of the first occurrences of the argument, aCharacter
     in the receiver or 0 if not found - reimplemented here for speed."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
#ifdef FAST_STRCHR
    char *strchr();
#else
    REGISTER int byteValue;
    REGISTER int index;
#endif
    OBJ cls;

    if (__isCharacter(aCharacter)) {
	cp = _stringVal(self);
	if ((cls = _qClass(self)) != String)
	    cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
#ifdef FAST_STRCHR
	cp = (unsigned char *) strchr(cp, _intVal(_characterVal(aCharacter)));
	if (cp) {
	    RETURN ( _MKSMALLINT(cp - _stringVal(self) + 1) );
	}
#else
	byteValue = _intVal(_characterVal(aCharacter));
	index = 1;
	while (*cp) {
	    if (*cp++ == byteValue) {
		RETURN ( _MKSMALLINT(index) );
	    }
	    index++;
	}
#endif
    }
%}
.
    ^ 0

    "
     'hello world' indexOf:(Character space)                  
     'hello world' indexOf:$A                      
    "
!

indexOf:aCharacter startingAt:start
    "return the index of the first occurrence of the argument, aCharacter
     in myself starting at start, anInteger or 0 if not found;
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int index, byteValue;
#ifdef FAST_STRCHR
    char *strchr();
#endif
    int len;
    OBJ cls;

    if (_isSmallInteger(start)) {
	if (__isCharacter(aCharacter)) {
	    byteValue = _intVal(_characterVal(aCharacter));
	    index = _intVal(start);
	    if (index <= 0)
		index = 1;
	    if ((cls = _qClass(self)) != String)
		index += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	    len = _stringSize(self);
	    if (index <= len) {
		cp = _stringVal(self) + index - 1;
#ifdef FAST_STRCHR
		cp = (unsigned char *) strchr(cp, byteValue);
		if (cp) {
		    RETURN ( _MKSMALLINT(cp - _stringVal(self) + 1) );
		}
#else
		while (*cp) {
		    if (*cp++ == byteValue) {
			RETURN ( _MKSMALLINT(index) );
		    }
		    index++;
		}
#endif
	    }
	}
	RETURN ( _MKSMALLINT(0) );
    }
%}
.
    ^ super indexOf:aCharacter startingAt:start

    "
     'hello world' indexOf:$l startingAt:1 
     'hello world' indexOf:$l startingAt:5  
    "
!

indexOfSeparatorStartingAt:start
    "return the index of the next separator character"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER char c;
    int len, index;
    OBJ cls;

    index = _intVal(start);
    if (index <= 0) {
	index = 1;
    }
    if ((cls = _qClass(self)) != String)
	index += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
    len = _stringSize(self);
    if (index > len) {
	RETURN ( _MKSMALLINT(0) );
    }
    cp = _stringVal(self) + index - 1;
    while (c = *cp++) {
#ifdef ASCII
	if (c <= ' ')
#endif
	if ((c == ' ') || (c == '\t') || (c == '\n')
	 || (c == '\r') || (c == '\f')) {
	    RETURN ( _MKSMALLINT(cp - _stringVal(self)) );
	}
    }
%}
.
    ^ 0

    "
     'hello world' indexOfSeparatorStartingAt:3 
     'hello world' indexOfSeparatorStartingAt:7 
    "
! !

!String methodsFor:'pattern matching'!

startsWith:aString
    "return true, if the receiver starts with something, aString."

    aString isString ifFalse: [
	(aString isMemberOf:Character) ifTrue:[
	    self isEmpty ifTrue:[^ false].
	    ^ (self at:1) == aString
	].
	^ super startsWith:aString
    ].
%{
    int len1, len2;
    REGISTER unsigned char *src1, *src2;
    REGISTER OBJ s = aString;
    OBJ cls;

    len1 = _qSize(self);
    src1 = _stringVal(self);
    if ((cls = _qClass(self)) != String) {
	int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	len1 -= n;
	src1 += n;
    }
    len2 = _qSize(s);
    src2 = _stringVal(s);
    if ((cls = _qClass(s)) != String) {
	int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	len2 -= n;
	src2 += n;
    }
    if (len1 < len2) {
	RETURN ( false );
    }
    while (*src2)
	if (*src2++ != *src1++) {
	    RETURN ( false );
	}
%}
.
    ^ true

    "
     'hello world' startsWith:'hello'  
     'hello world' startsWith:'hi'      
    "
!

endsWith:aString
    "return true, if the receiver end with something, aString."

    aString isString ifFalse: [
	(aString isMemberOf:Character) ifTrue:[
	    self isEmpty ifTrue:[^ false].
	    ^ (self at:(self size)) == aString
	].
	^ super endsWith:aString
    ].
%{
    int len1, len2;
    REGISTER unsigned char *src1, *src2;
    REGISTER OBJ s = aString;
    OBJ cls;

    len1 = _qSize(self);
    src1 = _stringVal(self);
    if ((cls = _qClass(self)) != String) {
	int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	len1 -= n;
	src1 += n;
    }
    len2 = _qSize(s);
    src2 = _stringVal(s);
    if ((cls = _qClass(s)) != String) {
	int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	len2 -= n;
	src2 += n;
    }
    if (len1 < len2) {
	RETURN ( false );
    }
    src1 = _stringVal(self) + len1 - len2;
    src2 = _stringVal(aString);
    while (*src2)
	if (*src2++ != *src1++) {
	    RETURN ( false );
	}
%}
.
    ^ true

    "
     'hello world' endsWith:'world'
     'hello world' endsWith:'earth'
    "
! !

!String methodsFor:'testing'!

isBlank
    "return true, if the receiver contains spaces only"

%{  /* NOCONTEXT */

    REGISTER unsigned char *src;
    OBJ cls;

    src = _stringVal(self);
    if ((cls = _qClass(self)) != String)
	src += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));

    while (*src)
	if (*src++ != ' ') {
	    RETURN ( false );
	}
%}
.
    ^ true
!

levenshteinTo:aString s:substWeight c:caseWeight i:insrtWeight d:deleteWeight
    "parametrized levenshtein. arguments are the costs for
     substitution, case-change, insertion and deletion of a character."

%{  /* STACK: 4000 */

    /* 
     * this is very heavy used when correcting errors 
     * (all symbols are searched for best match) - therefore it must be fast
     */
{
    unsigned short *data;
    int l1, l2;
    REGISTER int sz;
    unsigned char *s1, *s2;
    int v1, v2, v3, m;
    REGISTER unsigned short *dp;
    REGISTER int delta;
    REGISTER int j;
    int i;
    int iW, cW, sW, dW;
#   define FASTSIZE 30
    unsigned short fastData[(FASTSIZE + 1) * (FASTSIZE + 1)];

    if ((__isString(self) || __isSymbol(self))
     && (__isString(aString) || __isSymbol(aString))
     && _isSmallInteger(insrtWeight) && _isSmallInteger(caseWeight)
     && _isSmallInteger(substWeight) && _isSmallInteger(deleteWeight)) {
	iW = _intVal(insrtWeight);
	cW = _intVal(caseWeight);
	sW = _intVal(substWeight);
	dW = _intVal(deleteWeight);
	s1 = _stringVal(self);
	s2 = _stringVal(aString);
	l1 = strlen(s1);
	l2 = strlen(s2);

	sz = (l1 < l2) ? l2 : l1;
	delta = sz + 1;
	if (sz <= FASTSIZE) {
	    data = fastData;
	} else {
	    /* add ifdef ALLOCA here ... */
	    data = (unsigned short *)malloc(delta * delta * sizeof(short));
	}

	data[0] = 0;
	dp = data+1;
	for (j=1, dp=data+1; j<=sz; j++, dp++)
	    *dp = *(dp-1) + iW;

	for (i=1, dp=data+delta; i<=sz; i++, dp+=delta)
	    *dp = *(dp-delta) + dW;

	for (i=1; i<=l1; i++) {
	    for (j=1; j<=l2; j++) {
		dp = data + (i*delta) + j;
		if (s1[i] != s2[j]) {
		    if (tolower(s1[i]) == tolower(s2[j])) {
			m = cW;
		    } else {
			m = sW;
		    }
		} else
		    m = 0;

		v2 = *(dp - 1) + iW;
		v3 = *(dp - delta) + dW;
		v1 = *(dp - delta - 1) + m;
		if (v1 < v2)
		    if (v1 < v3)
			m = v1;
		    else
			m = v3;
		else
		    if (v2 < v3)
			m = v2;
		    else
			m = v3;
		*dp = m;
	    }
	}
	m = data[l1 * delta + l2];
	if (sz > FASTSIZE) 
	    free(data);
	RETURN ( _MKSMALLINT(m) );
    }
}
%}
.

    ^ super levenshteinTo:aString 
			s:substWeight c:caseWeight 
			i:insrtWeight d:deleteWeight

    "'ocmprt' levenshteinTo:'computer'
     'computer' levenshteinTo:'computer'
     'ocmputer' levenshteinTo:'computer'
     'cmputer' levenshteinTo:'computer'
     'Computer' levenshteinTo:'computer'"
! !

!String methodsFor:'copying'!

shallowCopy
    "return a copy of the receiver"

    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super shallowCopy
!

deepCopyUsing:aDictionary
    "return a copy of the receiver - reimplemented to be a bit faster"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
!

simpleDeepCopy
    "return a copy of the receiver"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
!

deepCopy
    "return a copy of the receiver"

    "
     could be an instance of a subclass which needs deepCopy
     of its named instvars ...
    "
    (self isMemberOf:String) ifTrue:[
	^ self copyFrom:1
    ].
    ^ super deepCopy
!

, aString
    "return the concatenation of myself and the argument, aString as
     a String.
     - reimplemented here for speed"

%{
    int l1, l2, sz;
    char *cp1, *cp2;
    REGISTER unsigned char *dstp;
    REGISTER OBJ s = aString;
    REGISTER OBJ _String = String;
    OBJ myClass, argClass, newString;
    OBJ new();

    if (_isNonNilObject(s)) {
	myClass = _qClass(self);
	argClass = _qClass(s);
	/*
	 * either same class or both Strings/Symbols:
	 * can do it here
	 */
	if ((myClass == argClass)
	 || (((myClass == _String) || (myClass == Symbol))
	     && ((argClass == _String) || (argClass == Symbol)))) {
		cp1 = (char *) _stringVal(self);
		l1 = _stringSize(self);
		if (myClass != _String) {
		    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));

		    cp1 += n;
		    l1 -= n;
		}

		cp2 = (char *) _stringVal(s);
		l2 = _stringSize(s);
		if (argClass != _String) {
		    int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(s))->c_ninstvars));

		    cp2 += n;
		    l2 -= n;
		}

		sz = OHDR_SIZE + l1 + l2 + 1;
		_qNew(newString, sz, __context);
		if (newString != nil) {
		    _InstPtr(newString)->o_class = String;
		    dstp = _stringVal(newString);
		    /*
		     * refetch in case of a GC
		     */
		    cp1 = (char *) _stringVal(self);
		    cp2 = (char *) _stringVal(s);
#ifdef FAST_MEMCPY
		    bcopy(cp1, dstp, l1);
		    bcopy(cp2, dstp + l1, l2+1);
#else
# ifdef FAST_STRCPY
		    strcpy(dstp, cp1);
		    strcpy(dstp + l1, cp2);
# else
		    while ((*dstp++ = *cp1++) != '\0') ;
		    dstp--;
		    while ((*dstp++ = *cp2++) != '\0') ;
# endif
#endif
		    RETURN ( newString );
		}
	}
    }
%}
.
    ^ super , aString
!

concatenate:string1 and:string2
    "return the concatenation of myself and the arguments, string1 and string2.
     This is equivalent to self , string1 , string2
     - generated by compiler when such a construct is detected"

    |newString|
%{
    int len1, len2, len3, sz;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    OBJ new();

    if ((__isString(self) || __isSymbol(self))
     && (__isString(string1) || __isSymbol(string1))
     && (__isString(string2) || __isSymbol(string2))) {
	len1 = _stringSize(self);
	len2 = _stringSize(string1);
	len3 = _stringSize(string2);
	sz = OHDR_SIZE + len1 + len2 + len3 + 1;
	_qNew(newString, sz, __context);
	if (newString != nil) {
	    _InstPtr(newString)->o_class = String;
	    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
	    bcopy(_stringVal(self), dstp, len1);
	    bcopy(_stringVal(string1), dstp + len1, len2);
	    bcopy(_stringVal(string2), dstp + len1 + len2, len3+1);
#else
# ifdef FAST_STRCPY
	    strcpy(dstp, _stringVal(self));
	    strcpy(dstp + len1, _stringVal(string1));
	    strcpy(dstp + len1 + len2, _stringVal(string2));
# else
	    srcp = _stringVal(self);
	    while ((*dstp++ = *srcp++) != '\0') ;
	    dstp--;
	    srcp = _stringVal(string1);
	    while ((*dstp++ = *srcp++) != '\0') ;
	    dstp--;
	    srcp = _stringVal(string2);
	    while ((*dstp++ = *srcp++) != '\0') ;
# endif
#endif
	    RETURN ( newString );
	}
    }
%}
.
    ^ super , string1 , string2
!

concatenate:string1 and:string2 and:string3
    "return the concatenation of myself and the string arguments.
     This is equivalent to self , string1 , string2 , string3
     - generated by compiler when such a construct is detected"

    |newString|
%{
    int len1, len2, len3, len4, sz;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    OBJ new();

    if ((__isString(self) || __isSymbol(self))
     && (__isString(string1) || __isSymbol(string1))
     && (__isString(string2) || __isSymbol(string2))
     && (__isString(string3) || __isSymbol(string3))) {
	len1 = _stringSize(self);
	len2 = _stringSize(string1);
	len3 = _stringSize(string2);
	len4 = _stringSize(string3);
	sz = OHDR_SIZE + len1 + len2 + len3 + len4 + 1;
	_qNew(newString, sz, __context);
	if (newString != nil) {
	    _InstPtr(newString)->o_class = String;
	    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
	    bcopy(_stringVal(self), dstp, len1);
	    bcopy(_stringVal(string1), dstp + len1, len2);
	    bcopy(_stringVal(string2), dstp + len1 + len2, len3);
	    bcopy(_stringVal(string3), dstp + len1 + len2 + len3, len4+1);
#else
# ifdef FAST_STRCPY
	    strcpy(dstp, _stringVal(self));
	    strcpy(dstp + len1, _stringVal(string1));
	    strcpy(dstp + len1 + len2, _stringVal(string2));
	    strcpy(dstp + len1 + len2 + len3, _stringVal(string3));
# else
	    srcp = _stringVal(self);
	    while ((*dstp++ = *srcp++) != '\0') ;
	    dstp--;
	    srcp = _stringVal(string1);
	    while ((*dstp++ = *srcp++) != '\0') ;
	    dstp--;
	    srcp = _stringVal(string2);
	    while ((*dstp++ = *srcp++) != '\0') ;
	    dstp--;
	    srcp = _stringVal(string3);
	    while ((*dstp++ = *srcp++) != '\0') ;
# endif
#endif
	    RETURN ( newString );
	}
    }
%}
.
    ^ super , string1 , string2 , string3
!

copyWith:aCharacter
    "return the concatenation of myself and the argument, aCharacter
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    int sz;
    REGISTER unsigned char *dstp;
    int offs;
    OBJ cls, newString;

    if (__isCharacter(aCharacter)) {
	sz = _qSize(self) + 1;
	if ((cls = _qClass(self)) != String) {
	    offs = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
	    sz -= offs;
	} else
	    offs = 0;

	PROTECT_CONTEXT
	_qNew(newString, sz, SENDER);
	UNPROTECT_CONTEXT
	if (newString) {
	    _InstPtr(newString)->o_class = String;
	    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
	    sz = sz - OHDR_SIZE - 1 - 1;
	    bcopy(_stringVal(self) + offs, dstp, sz);
	    dstp += sz;
#else
# ifdef FAST_STRCPY
	    strcpy(dstp, _stringVal(self) + offs);
	    dstp += sz - OHDR_SIZE - 1 - 1;
# else
	    {
		REGISTER unsigned char *srcp;

		srcp = _stringVal(self) + offs;
		while ((*dstp = *srcp++) != '\0')
		    dstp++;
	    }
# endif
#endif
	    *dstp++ = _intVal(_characterVal(aCharacter));
	    *dstp = '\0';
	    RETURN (newString );
	}
    }
%}.
    "fall back in case of non-character arg;
     will eventually lead to an bad element signal raise"

    ^ super copyWith:aCharacter
!

copyFrom:start to:stop
    "return the substring starting at index start, anInteger and ending
     at stop, anInteger. This method will always return a string, even
     if the receiver is a subclass-instance. This might change if there is a need.
     - reimplemented here for speed"

%{  /* NOCONTEXT */

#if !defined(FAST_MEMCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    REGISTER int count;
    int len, sz, index1, index2;
    OBJ newString;

    if (_isSmallInteger(start) && _isSmallInteger(stop)) {
	len = _stringSize(self);
	index1 = _intVal(start);
	index2 = _intVal(stop);

	if ((index1 <= index2) && (index1 > 0)) {
	    if (_qClass(self) != String) {
		int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));

		index1 += n;
		index2 += n;
	    }
	    if (index2 <= len) {
		count = index2 - index1 + 1;
		PROTECT_CONTEXT
		sz = OHDR_SIZE + count + 1;
		_qNew(newString, sz, SENDER);
		UNPROTECT_CONTEXT
		if (newString != nil) {
		    _InstPtr(newString)->o_class = String;
		    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
		    bcopy(_stringVal(self) + index1 - 1, dstp, count);
		    dstp[count] = '\0';
#else
		    srcp = _stringVal(self) + index1 - 1;
		    while (count--) {
			*dstp++ = *srcp++;
		    }
		    *dstp = '\0';
#endif
		    RETURN ( newString );
		}
	    }
	}
    }
%}.
    "fall back in case of non-integer index or out-of-bound index;
     will eventually lead to an out-of-bound signal raise"

    ^ super copyFrom:start to:stop
!

copyFrom:start
    "return the substring from start, anInteger to the end.
     This method will always return a string, even if the receiver 
     is a subclass-instance. This might change if there is a need.
     - reimplemented here for speed"

%{  /* NOCONTEXT */

#if !defined(FAST_MEMCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    REGISTER int count;
    int len, index1, sz;
    OBJ newString;

    if (_isSmallInteger(start)) {
	len = _stringSize(self);
	index1 = _intVal(start);
	if (index1 > 0) {
	    if (_qClass(self) != String) {
		int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));

		index1 += n;
	    }
	    if (index1 <= len) {
		count = len - index1 + 1;
		PROTECT_CONTEXT
		sz = OHDR_SIZE + count + 1;
		_qNew(newString, sz, SENDER);
		UNPROTECT_CONTEXT
		if (newString != nil) {
		    _InstPtr(newString)->o_class = String;
		    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
		    bcopy(_stringVal(self) + index1 - 1, dstp, count);
		    dstp[count] = '\0';
#else
		    srcp = _stringVal(self) + index1 - 1;
		    while (count--) {
			*dstp++ = *srcp++;
		    }
		    *dstp = '\0';
#endif
		    RETURN ( newString );
		}
	    }
	}
    }
%}.
    "fall back in case of non-integer index or out-of-bound index;
     will eventually lead to an out-of-bound signal raise"

    ^ super copyFrom:start
! !

!String methodsFor:'filling and replacing'!

replaceFrom:start to:stop with:aString startingAt:repStart
    "replace the characters starting at index start, anInteger and ending
     at stop, anInteger with characters from aString starting at repStart.

     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp, *dstp;
    REGISTER int count;
    int len, index1, index2;
    int repLen, repIndex;

    if ((__isString(aString) || __isSymbol(aString))
     && __isString(self)
     && _isSmallInteger(start)
     && _isSmallInteger(stop)) {
	len = _stringSize(self);
	index1 = _intVal(start);
	index2 = _intVal(stop);
	count = index2 - index1 + 1;
	if (count <= 0) {
	     RETURN (self);
	}
	if ((index2 <= len) && (index1 > 0)) {
	    repLen = _stringSize(aString);
	    repIndex = _intVal(repStart);
	    if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) {
		srcp = _stringVal(aString) + repIndex - 1;
		dstp = _stringVal(self) + index1 - 1;
		if (aString == self) {
		    /* take care of overlapping copy */
		    if (srcp < dstp) {
			/* must do a reverse copy */
			srcp += count;
			dstp += count;
			while (count-- > 0) {
			    *--dstp = *--srcp;
			}
			RETURN (self);
		    }
		}
#ifdef FAST_MEMCPY
		bcopy(srcp, dstp, count);
#else
		while (count-- > 0) {
		    *dstp++ = *srcp++;
		}
#endif
		RETURN (self);
	    }
	}
    }
%}
.
    ^ super replaceFrom:start to:stop with:aString startingAt:repStart
!

replaceAll:oldCharacter by:newCharacter
    "replace all oldCharacters by newCharacter in the receiver"

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp;
    REGISTER unsigned oldVal, newVal;

    if (__isCharacter(oldCharacter)
     && __isCharacter(newCharacter)
     && __isString(self)) {
	srcp = (unsigned char *)_stringVal(self);
	oldVal = _intVal(_characterVal(oldCharacter));
	newVal = _intVal(_characterVal(newCharacter));
	while (*srcp) {
	    if (*srcp == oldVal)
		*srcp = newVal;
	    srcp++;
	}
	RETURN ( self );
    }
%}
.
    ^ super replaceAll:oldCharacter by:newCharacter
!

reverse                                                                         
    "in-place reverse the characters of the string."

    "Q: is there a need to redefine it here ?"

%{  /* NOCONTEXT */

    REGISTER char c;
    REGISTER unsigned char *hip, *lowp;

    if (__isString(self)) {
	lowp = _stringVal(self);
	hip = lowp + _stringSize(self) - 1;
	while (lowp < hip) {
	    c = *lowp;
	    *lowp = *hip;
	    *hip = c;
	    lowp++;
	    hip--;
	}
	RETURN ( self );
    }
%}
.
    ^ super reverse
!

withCRs
    "return a copy of the receiver, where
     all \-characters are replaced by newline characters
     - reimplemented here for speed"

    |newString|
%{
    OBJ new();
    REGISTER char c;
    REGISTER unsigned char *srcp, *dstp;
    int len, offs;

    len = _qSize(self);
    if (_qClass(self) != String) {
	offs = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
	len -= offs;
    } else
	offs = 0;

    _qNew(newString, len, __context);
    if (newString != nil) {
	_InstPtr(newString)->o_class = String;
	srcp = _stringVal(self) + offs;
	dstp = _stringVal(newString);
	while (c = *srcp++)
	    if (c == '\\')
		*dstp++ = '\n';
	    else
		*dstp++ = c;
	*dstp++ = '\0';
	RETURN ( newString );
    }
%}
.
    ^ super withCRs
!

atAllPut:aCharacter
    "replace all characters with aCharacter
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER int  byteValue;
#ifndef FAST_MEMSET
    REGISTER unsigned char *dst;
    REGISTER int l;
#endif

    if (__isCharacter(aCharacter) && __isString(self)) {
	byteValue = _intVal(_characterVal(aCharacter));
#ifdef FAST_MEMSET
	memset(_stringVal(self), byteValue, _qSize(self) - OHDR_SIZE - 1);
#else
	dst = _stringVal(self);
	l = _qSize(self) - OHDR_SIZE - 1;
	while (l >= 8) {
	    dst[0] = dst[1] = dst[2] = dst[3] = byteValue;
	    dst[4] = dst[5] = dst[6] = dst[7] = byteValue;
	    dst += 8;
	    l -= 8;
	}
	while (l--)
	    *dst++ = byteValue;
#endif
	RETURN ( self );
    }
%}
.
    ^ super atAllPut:aCharacter

    "
     (String new:10) atAllPut:$*   
     String new:10 withAll:$*     
    "
!

withoutSpaces
    "return a copy of myself without leading and trailing spaces.
     Notice, this is different from String>>withoutSeparators."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz blank|

    startIndex := 0.
%{
    REGISTER unsigned char *cp0;
    REGISTER unsigned char *cp;

    /* ignore instances of subclasses ... */
    if (_qClass(self) == String) {
	cp = cp0 = _stringVal(self);
	while (*cp == ' ') cp++;
	startIndex = _MKSMALLINT(cp - cp0 + 1);
	cp = cp + strlen(cp) - 1;
	while ((cp >= cp0) && (*cp == ' ')) cp--;
	endIndex = _MKSMALLINT(cp - cp0 + 1);
    }
%}
.
    sz := self size.
    startIndex == 0 ifTrue:[
	startIndex := 1.
	endIndex := sz.
	blank := Character space.
	[(startIndex < endIndex) and:[(self at:startIndex) == blank]] whileTrue:[
	    startIndex := startIndex + 1
	].
	[(endIndex > 1) and:[(self at:endIndex) == blank]] whileTrue:[
	    endIndex := endIndex - 1
	]
    ].
    startIndex > endIndex ifTrue:[
	^ ''
    ].
    ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
	^ self
    ].
    ^ self copyFrom:startIndex to:endIndex
!

withoutSeparators
    "return a copy of myself without leading and trailing whitespace.
     Notice, this is different from String>>withoutSpaces."

    |startIndex "{ Class: SmallInteger }"
     endIndex   "{ Class: SmallInteger }" 
     sz|

    startIndex := 0.
%{
    REGISTER unsigned char *cp0;
    REGISTER unsigned char *cp;
    REGISTER unsigned char c;

    /* ignore instances of subclasses ... */
    if (_qClass(self) == String) {
	cp = cp0 = _stringVal(self);
	c = *cp;
	while ((c == ' ') || (c == '\n') || (c == '\t')
			  || (c == '\r') || (c == '\f')) {
	    cp++;
	    c = *cp;
	}
	startIndex = _MKSMALLINT(cp - cp0 + 1);
	cp = cp + strlen(cp) - 1;
	while ((cp >= cp0) && (*cp == ' ')) cp--;
	c = *cp;
	while ((cp >= cp0) &&
	       ((c == ' ') || (c == '\n') || (c == '\t')
			   || (c == '\r') || (c == '\f'))) {
	    cp--;
	    c = *cp;
	}
	endIndex = _MKSMALLINT(cp - cp0 + 1);
    }
%}
.
    sz := self size.
    startIndex == 0 ifTrue:[
	startIndex := 1.
	endIndex := self size.
	[(startIndex < endIndex) and:[(self at:startIndex) isSeparator]] whileTrue:[
	    startIndex := startIndex + 1
	].
	[(endIndex > 1) and:[(self at:endIndex) isSeparator]] whileTrue:[
	    endIndex := endIndex - 1
	].
    ].
    startIndex > endIndex ifTrue:[
	^ ''
    ].
    ((startIndex == 1) and:[endIndex == sz]) ifTrue:[
	^ self
    ].
    ^ self copyFrom:startIndex to:endIndex
! !

!String methodsFor:'queries'!

encoding
    "assume iso8859 encoding"

    ^ #iso8859
!

knownAsSymbol
    "return true, if there is a symbol with same characters in the
     system - use to check for existance of a symbol without creating one"

%{  /* NOCONTEXT */
    extern OBJ _KNOWNASSYMBOL();

    RETURN ( _KNOWNASSYMBOL(_stringVal(self)) );
%}
!

isEmpty
    "return true if the receiver is empty (i.e. if size == 0)
     Redefined here for performance"

%{  /* NOCONTEXT */
    OBJ cls;

    cls = _qClass(self);
    if ((cls == String) || (cls == Symbol)) {
	RETURN ( (_stringSize(self) == 0) ? true : false);
    }
%}
.
    ^ super isEmpty
! !