String.st
author claus
Fri, 16 Jul 1993 11:39:45 +0200
changeset 1 a27a279701f8
child 2 6526dde5f3ac
permissions -rw-r--r--
Initial revision

"
 COPYRIGHT (c) 1988-93 by Claus Gittinger
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"

ByteArray subclass:#String
       instanceVariableNames:''
       classVariableNames:''
       poolDictionaries:''
       category:'Collections-Text'
!

String comment:'

COPYRIGHT (c) 1988-93 by Claus Gittinger
             All Rights Reserved

%W% %E%
'!

%{
#include <stdio.h>
#include <ctype.h>
%}

!String class methodsFor:'documentation'!

documentation
"
Strings are ByteArrays storing Characters.

Strings are kind of kludgy: to allow for easy handling by c-functions,
there is always one 0-byte added at the end, which is not counted
in size. also, the at:put: method does not allow for storing 0-bytes.
(to do this, the basicAt:put: and basicNew: methods are redefined)

You cannot add any instvars to String, since the code (also in the run time
system & compiler) knows that strings have no named instvars. If you need
to, you have to create a subclass.
"
! !

!String class methodsFor:'instance creation'!

basicNew:anInteger
    "return a new empty string with anInteger characters"

%{  /* NOCONTEXT */

    OBJ newString;
    REGISTER int len;
    REGISTER unsigned char *cp;
    REGISTER OBJ *op;
    int nInstVars, instsize;
    extern OBJ new();

    if (_isSmallInteger(anInteger)) {
        len = _intVal(anInteger);
        if (len >= 0) {
            nInstVars = _intVal(_ClassInstPtr(self)->c_ninstvars);
            instsize = OHDR_SIZE + (nInstVars * sizeof(OBJ)) + len + 1;
            PROTECT(self);
            _qNew(newString, instsize, SENDER);
            UNPROTECT(self);
            _InstPtr(newString)->o_class = self;

            if (nInstVars) {
#if defined(memset4)
                memset4(_InstPtr(newString)->i_instvars, nil, nInstVars);
#else
# if defined(FAST_MEMSET) && ! defined(NEGATIVE_ADDRESSES)
                /*
                 * knowing that nil is 0
                 */
                memset(_InstPtr(newString)->i_instvars, 0, instsize - OHDR_SIZE);
# else
                op = _InstPtr(newString)->i_instvars;
                do {
                    *op++ = nil;
                } while (--nInstVars);
# endif
#endif
                cp = _stringVal(newString) + (nInstVars * sizeof(OBJ));
            } else
                cp = _stringVal(newString);

#ifdef FAST_MEMSET
            memset(cp, ' ', len);
            *(cp + len) = '\0';
#else
            while (len--)
                *cp++ = ' ';
            *cp = '\0';
#endif
            RETURN (newString);
        }
    }
%}
.
    ^ (super basicNew:anInteger) atAllPut:(Character space)
!

new:anInteger
    "same as basicNew - to avoid another send"

    ^ self basicNew:anInteger
!

basicNew
    "return a new empty string"

    ^ self basicNew:0
!

new
    "return a new empty string"

    ^ self basicNew:0
!

unititializedNew:anInteger
    "redefine it back - strings must have a 0-byte at the end"

    ^ self basicNew:anInteger
!

fromString:aString
    "return a copy of the argument, aString"

    ^ aString copyFrom:1 to:(aString size)
! !

!String methodsFor:'accessing'!

basicSize
    "return the number of characters in myself"

%{  /* NOCONTEXT */

    if ((_qClass(self) == String) || (_qClass(self) == Symbol)) {
        RETURN ( _MKSMALLINT(_stringSize(self)) );
    }
%}
.
    ^ super basicSize - 1
!

size
    "return the number of characters in myself
     - reimplemented here to avoid double send (size -> basicSize)"

%{  /* NOCONTEXT */

    if ((_qClass(self) == String) || (_qClass(self) == Symbol)) {
        RETURN ( _MKSMALLINT(_stringSize(self)) );
    }
%}
.
    ^ super basicSize - 1
!

basicAt:index
    "return the character at position index, an Integer
     - reimplemented here since we return characters"

%{  /* NOCONTEXT */

    REGISTER int indx;
    int len;

    if (_isSmallInteger(index)) {
        indx = _intVal(index);
        if (_qClass(self) != String)
            indx += _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
        len = _stringSize(self);
        if ((indx > 0) && (indx <= len)) {
            RETURN ( _MKCHARACTER(_stringVal(self)[indx-1] & 0xFF) );
        }
    }
%}
.
    self subscriptBoundsError:index
!

basicAt:index put:aCharacter
    "store the argument, aCharacter at position index, an Integer
     - reimplemented here since we store characters"

%{  /* NOCONTEXT */

    REGISTER int value, indx;
    int len;

    if (_isCharacter(aCharacter)) {
        value = _intVal(_characterVal(aCharacter));
        if (value && _isSmallInteger(index)) {
            indx = _intVal(index);
            if (_qClass(self) != String)
                indx += _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
            len = _stringSize(self);
            if ((indx > 0) && (indx <= len)) {
                _stringVal(self)[indx-1] = value;
                RETURN ( aCharacter );
            }
        }
    }
%}
.
    (aCharacter isMemberOf:Character) ifFalse:[
        self elementNotCharacter
    ] ifTrue:[
        (aCharacter asciiValue == 0) ifTrue:[
            self error:'0-character not allowed in strings'
        ] ifFalse:[
            self subscriptBoundsError:index
        ]
    ]
! !

!String methodsFor:'converting'!

asUppercase
    "return a copy of myself in uppercase letters"

    |newStr
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    1 to:mySize do:[:i |
        newStr at:i put:(self at:i) asUppercase
    ].
    ^newStr
!

asLowercase
    "return a copy of myself in lowercase letters"

    |newStr
     mySize "{ Class: SmallInteger }" |

    mySize := self size.
    newStr := self species new:mySize.
    1 to:mySize do:[:i |
        newStr at:i put:(self at:i) asLowercase
    ].
    ^newStr
!

asString
    "return myself - I am a string"

    ^ self
!

asSymbol
    "return a unique symbol with name taken from myself.
     The argument must be a String, subclass instances are not allowed."
%{
    if (_qClass(self) == String) {
        RETURN ( _MKSYMBOL(_stringVal(self), (OBJ *)0, __context) );
    }
%}
.
    self primitiveFailed
!

asText
    "return a Text-object (collection of lines) from myself"

    ^ Text from:self
!

asNumber
    "read a number from the receiver"

    ^ Number readFromString:self

    "'123' asNumber"
    "'123.567' asNumber"
    "'(5/6)' asNumber"
!

asFilename
    "return a Filename with pathname taken from the receiver"

    ^ Filename named:self
! !

!String methodsFor:'printing & storing'!

printOn:aStream
    "print the receiver on aStream"

    aStream nextPutAll:self
!

printString
    "return a string for printing - thats myself"

    ^ self
!

print
    "print the receiver on standard output - for debugging only"

%{  /* NOCONTEXT */

    if (_qClass(self) == String) {
        printf("%s", _stringVal(self));
        RETURN (self);
    }
%}
.
    super print
!

printfPrintString:formatString
    "non-portable but sometimes useful.
     return a printed representation of the receiver
     as specified by formatString, which is defined by printf.
     No checking on overrunning the buffer - the result must be shorter than 8k chars"

%{  /* NOCONTEXT */

    char buffer[8192];
    char *cp;

    if (_isString(formatString)) {
#ifdef THIS_CONTEXT
        /* mhmh - sprintf seems to destroy thisContext (if its in a register) */
        OBJ sav = __thisContext;
#endif
        cp = (char *)_stringVal(self);
        if (_qClass(self) != String)
            cp += _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);

        sprintf(buffer, (char *)_stringVal(formatString), cp);
#ifdef THIS_CONTEXT
        __thisContext = sav;
#endif
        RETURN ( _MKSTRING(buffer COMMA_SND) );
    }
%}
.
    self primitiveFailed

    "'hello' printfPrintString:'%%s -> %s'"
    "'hello' printfPrintString:'%%10s -> %10s'"
    "'hello' printfPrintString:'%%-10s -> %-10s'"
!

displayString
    "return a string to display the receiver - use storeString to have
     quotes around"

    ^ self storeString
!

storeString
    "return a String for storing myself"

    |s|

    (self includes:$') ifTrue:[
        s := ''''.
        self do:[:thisChar |
            (thisChar == $') ifTrue:[s := s , ''''].
            s := s copyWith:thisChar
        ].
        s := s , ''''.
        ^ s
    ].
    ^ '''' asString , self , '''' asString
!

storeOn:aStream
    "put the storeString of myself on aStream"

    aStream nextPut: $'.
    (self includes:$') ifTrue:[
        self do:[:thisChar |
            (thisChar == $') ifTrue:[aStream nextPut:thisChar].
            aStream nextPut:thisChar
        ]
    ] ifFalse:[
        aStream nextPutAll:self
    ].
    aStream nextPut:$'
! !

!String methodsFor:'comparing'!

hash
    "return an integer useful as a hash-key"

%{  /* NOCONTEXT */

    REGISTER int g, val;
    REGISTER unsigned char *cp, *cp0;
    int l;

    cp = _stringVal(self);
    l = _stringSize(self);
    if (_qClass(self) != String) {
        int n = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);

        cp += n;
        l -= n;
    }

    /*
     * this is the dragon-book algorithm with a funny start
     * value (to give short strings a number above 8192)
     */
    val = 12345;
    for (cp0 = cp, cp += l - 1; cp >= cp0; cp--) {
        val = (val << 5) + (*cp & 0x1F);
        if (g = (val & 0x3E000000))
            val ^= g >> 25 /* 23 */ /* 25 */;
        val &= 0x3FFFFFFF;
    }

    if (l) {
        l |= 1; 
        val = (val * l) & 0x3FFFFFFF;
    }

    RETURN ( _MKSMALLINT(val) );
%}
!

<= something
    "Compare the receiver with the argument and return true if the
     receiver is less than or equal to the argument. Otherwise return false."

    ^ (self > something) not
!

< something
    "Compare the receiver with the argument and return true if the
     receiver is less than the argument. Otherwise return false."

    ^ (something > self)
!

>= something
    "Compare the receiver with the argument and return true if the
     receiver is greater than or equal to the argument.
     Otherwise return false."

    ^ (something > self) not
!

> aString
    "Compare the receiver with the argument and return true if the
     receiver is greater than the argument. Otherwise return false."

%{  /* NOCONTEXT */

    int len1, len2, cmp;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;

    if (_isNonNilObject(s)
     && ((_qClass(s) == String) || (_qClass(s) == Symbol) || (_qClass(s) == _qClass(self)))) {
        cp1 = (char *) _stringVal(self);
        len1 = _stringSize(self);
        if (_qClass(self) != String) {
            int n = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);

            cp1 += n;
            len1 -= n;
        }

        cp2 = (char *) _stringVal(s);
        len2 = _stringSize(s);
        if (_qClass(s) != String) {
            int n = _intVal(_ClassInstPtr(_qClass(s))->c_ninstvars) * sizeof(OBJ);

            cp2 += n;
            len2 -= n;
        }

        if (len1 <= len2)
            cmp = strncmp(cp1, cp2, len1);
        else
            cmp = strncmp(cp1, cp2, len2);

        if (cmp > 0) {
            RETURN ( true );
        }
        if ((cmp == 0) && (len1 > len2)) {
            RETURN ( true );
        }
        RETURN ( false );
    }
%}
.
    ^ super > aString
!

= aString
    "Compare the receiver with the argument and return true if the
     receiver is equal to the argument. Otherwise return false."

%{  /* NOCONTEXT */

    int l1, l2;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;

    if (s == self) {
        RETURN ( true );
    }
    if (! _isNonNilObject(s)) {
        RETURN ( false );
    }

    if ((_qClass(s) == String) || (_qClass(s) == Symbol) || (_qClass(s) == _qClass(self))) {
        cp1 = (char *) _stringVal(self);
        l1 = _stringSize(self);
        if (_qClass(self) != String) {
            int n = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);

            cp1 += n;
            l1 -= n;
        }

        cp2 = (char *) _stringVal(s);
        l2 = _stringSize(s);
        if (_qClass(s) != String) {
            int n = _intVal(_ClassInstPtr(_qClass(s))->c_ninstvars) * sizeof(OBJ);

            cp2 += n;
            l2 -= n;
        }

        if (l1 != l2) {
            RETURN ( false );
        }
        RETURN ( (strncmp(cp1, cp2, l1) == 0) ? true : false );
    }
%}
.
    ^ super = aString
!

~= aString
    "Compare the receiver with the argument and return true if the
     receiver is not equal to the argument. Otherwise return false."

%{  /* NOCONTEXT */

    int l1, l2;
    REGISTER OBJ s = aString;
    char *cp1, *cp2;

    if (s == self) {
        RETURN ( false );
    }
    if (! _isNonNilObject(s)) {
        RETURN ( true );
    }
    if ((_qClass(s) == String) || (_qClass(s) == Symbol) || (_qClass(s) == _qClass(self))) {
        cp1 = (char *) _stringVal(self);
        l1 = _stringSize(self);
        if (_qClass(self) != String) {
            int n = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);

            cp1 += n;
            l1 -= n;
        }

        cp2 = (char *) _stringVal(s);
        l2 = _stringSize(s);
        if (_qClass(s) != String) {
            int n = _intVal(_ClassInstPtr(_qClass(s))->c_ninstvars) * sizeof(OBJ);

            cp2 += n;
            l2 -= n;
        }

        if (l1 != l2) {
            RETURN ( true );
        }
        RETURN ( (strncmp(cp1, cp2, l1) == 0) ? false : true );
    }
%}
.
    ^ super ~= aString
! !

!String methodsFor:'testing'!

occurrencesOf:aCharacter
    "count the occurrences of the argument, aCharacter in myself
      - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int byteValue;
    REGISTER int count;

    if (_isCharacter(aCharacter)) {
        count = 0;
        byteValue = _intVal(_characterVal(aCharacter));
        cp = _stringVal(self);
        if (_qClass(self) != String)
            cp += _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
        while (*cp) {
            if (*cp++ == byteValue) count++;
        }
        RETURN ( _MKSMALLINT(count) );
    }
%}
.
    ^ 0
!

includes:aCharacter
    "return true if the argument, aCharacter is included in the receiver
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int byteValue;
    extern char *strchr();

    if (_isCharacter(aCharacter)) {
        byteValue = _intVal(_characterVal(aCharacter));
        cp = _stringVal(self);
        if (_qClass(self) != String)
            cp += _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
#ifdef FAST_STRCHR
        cp = (unsigned char *) strchr(cp, _intVal(_characterVal(aCharacter)));
        if (cp) {
            RETURN ( true );
        }
#else
        while (*cp) {
            if (*cp == byteValue) {
                RETURN ( true );
            }
            cp++;
        }
#endif
    }
%}
.
    ^ false
!

indexOf:aCharacter
    "return the index of the first occurrences of the argument, aCharacter
     in the receiver or 0 if not found - reimplemented here for speed."

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
#ifdef FAST_STRCHR
    char *strchr();
#else
    REGISTER int byteValue;
    REGISTER int index;
#endif

    if (_isCharacter(aCharacter)) {
        cp = _stringVal(self);
        if (_qClass(self) != String)
            cp += _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
#ifdef FAST_STRCHR
        cp = (unsigned char *) strchr(cp, _intVal(_characterVal(aCharacter)));
        if (cp) {
            RETURN ( _MKSMALLINT(cp - _stringVal(self) + 1) );
        }
#else
        byteValue = _intVal(_characterVal(aCharacter));
        index = 1;
        while (*cp) {
            if (*cp++ == byteValue) {
                RETURN ( _MKSMALLINT(index) );
            }
            index++;
        }
#endif
    }
%}
.
    ^ 0
!

indexOf:aCharacter startingAt:start
    "return the index of the first occurrence of the argument, aCharacter
     in myself starting at start, anInteger or 0 if not found;
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER int index, byteValue;
#ifdef FAST_STRCHR
    char *strchr();
#endif
    int len;

    if (_isSmallInteger(start)) {
        if (_isCharacter(aCharacter)) {
            byteValue = _intVal(_characterVal(aCharacter));
            index = _intVal(start);
            if (index <= 0)
                index = 1;
            if (_qClass(self) != String)
                index += _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
            len = _stringSize(self);
            if (index <= len) {
                cp = _stringVal(self) + index - 1;
#ifdef FAST_STRCHR
                cp = (unsigned char *) strchr(cp, byteValue);
                if (cp) {
                    RETURN ( _MKSMALLINT(cp - _stringVal(self) + 1) );
                }
#else
                while (*cp) {
                    if (*cp++ == byteValue) {
                        RETURN ( _MKSMALLINT(index) );
                    }
                    index++;
                }
#endif
            }
        }
        RETURN ( _MKSMALLINT(0) );
    }
%}
.
    ^ super indexOf:aCharacter startingAt:start
!

indexOfSeparatorStartingAt:start
    "return the index of the next separator character"

%{  /* NOCONTEXT */

    REGISTER unsigned char *cp;
    REGISTER char c;
    int len, index;

    index = _intVal(start);
    if (index <= 0) {
        index = 1;
    }
    if (_qClass(self) != String)
        index += _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
    len = _stringSize(self);
    if (index > len) {
        RETURN ( _MKSMALLINT(0) );
    }
    cp = _stringVal(self) + index - 1;
    while (c = *cp++) {
#ifdef ASCII
        if (c <= ' ')
#endif
        if ((c == ' ') || (c == '\t') || (c == '\n')
         || (c == '\r') || (c == '\f')) {
            RETURN ( _MKSMALLINT(cp - _stringVal(self)) );
        }
    }
%}
.
    ^ 0
!

includesMatchCharacters
    "return true if the receiver includes any meta characters (i.e. $* or $#) 
     for match operations; false if not"

    (self includes:$*) ifTrue:[^ true].
    ^ (self includes:$#)
!

from:matchStart to:matchStop match:aString from:start to:stop
    "helper for match; return true if the characters from start to stop in
     aString are matching the receivers characters from matchStart to matchStop.
     The receiver may contain meta-match characters $* (to match any string) 
     or $# (to match any character)."

    |matchChar mStart mStop sStart sStop mSize sSize index cont matchLast|

    mStart := matchStart.
    mStop := matchStop.
    sStart := start.
    sStop := stop.

    [true] whileTrue:[
        mSize := mStop - mStart + 1.
        sSize := sStop - sStart + 1.

        "empty strings match"
        (mSize == 0) ifTrue:[^ (sSize == 0)].

        matchChar := self at:mStart.

        (matchChar == $#) ifTrue:[
            "testString empty -> no match"
            (sSize == 0) ifTrue:[^ false].
            "# matches single character"
            ((sSize == 1) and:[mSize == 1]) ifTrue:[^ true].
            "cut off 1st chars and continue"
            mStart := mStart + 1.
            sStart := sStart + 1
        ] ifFalse:[
            (matchChar == $*) ifTrue:[
                "testString empty -> we have a match"
                (sSize == 0) ifTrue:[^ true].
                "* matches anything"
                (mSize == 1) ifTrue:[^ true].

                "try to avoid some of the recursion by checking last
                 character and continue with shortened strings if possible"
                cont := false.
                (mStop >= mStart) ifTrue:[
                    matchLast := self at:mStop.
                    (matchLast ~~ $*) ifTrue:[
                        (matchLast == $#) ifTrue:[
                            cont := true
                        ] ifFalse:[
                            (matchLast == (aString at:sStop)) ifTrue:[
                                cont := true
                            ]
                        ]
                    ]
                ].
                cont ifFalse:[
                    index := sStart.
                    [index <= sStop] whileTrue:[
                        (self from:(mStart + 1) to:mStop match:aString 
                              from:index to:sStop) ifTrue:[
                            ^ true
                        ].
                        index := index + 1
                    ].
                    ^ false
                ].
                mStop := mStop - 1.
                sStop := sStop - 1
            ] ifFalse:[

                "testString empty ?"
                (sSize == 0) ifTrue:[^ false].

                "first characters equal ?"
                ((aString at:sStart) ~~ matchChar) ifTrue:[^ false].

                "avoid recursion if possible"
                ((sSize == mSize) and:[self = aString]) ifTrue:[^ true].

                "cut off 1st chars and continue"
                mStart := mStart + 1.
                sStart := sStart + 1
            ]
        ]
    ]
!

match:aString
    "return true if aString matches self, where self may contain meta-match 
     characters $* (to match any string) or $# (to match any character)."

    ^ self from:1 to:(self size) match:aString from:1 to:(aString size)

    " '*ute*' match:'computer' "
    " '*uter' match:'computer' "
    " 'uter*' match:'computer' "
!

startsWith:aString
    "return true, if the receiver starts with something, aString"

    (aString isKindOf:String) ifFalse: [
        (aString isMemberOf:Character) ifTrue:[
            self isEmpty ifTrue:[^ false].
            ^ (self at:1) == aString
        ].
        ^ super startsWith:aString
    ].
%{
    int len1, len2;
    REGISTER unsigned char *src1, *src2;
    REGISTER OBJ s = aString;

    len1 = _qSize(self);
    src1 = _stringVal(self);
    if (_qClass(self) != String) {
        int n = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
        len1 -= n;
        src1 += n;
    }
    len2 = _qSize(s);
    src2 = _stringVal(s);
    if (_qClass(s) != String) {
        int n = _intVal(_ClassInstPtr(_qClass(s))->c_ninstvars) * sizeof(OBJ);
        len2 -= n;
        src2 += n;
    }
    if (len1 < len2) {
        RETURN ( false );
    }
    while (*src2)
        if (*src2++ != *src1++) {
            RETURN ( false );
        }
%}
.
    ^ true

    "'hello world' startsWith:'hello'"
    "'hello world' startsWith:'hi'"
!

endsWith:aString
    "return true, if the receiver end with something, aString"

    (aString isKindOf:String) ifFalse: [
        (aString isMemberOf:Character) ifTrue:[
            self isEmpty ifTrue:[^ false].
            ^ (self at:(self size)) == aString
        ].
        ^ super endsWith:aString
    ].
%{
    int len1, len2;
    REGISTER unsigned char *src1, *src2;
    REGISTER OBJ s = aString;

    len1 = _qSize(self);
    src1 = _stringVal(self);
    if (_qClass(self) != String) {
        int n = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
        len1 -= n;
        src1 += n;
    }
    len2 = _qSize(s);
    src2 = _stringVal(s);
    if (_qClass(s) != String) {
        int n = _intVal(_ClassInstPtr(_qClass(s))->c_ninstvars) * sizeof(OBJ);
        len2 -= n;
        src2 += n;
    }
    if (len1 < len2) {
        RETURN ( false );
    }
    src1 = _stringVal(self) + len1 - len2;
    src2 = _stringVal(aString);
    while (*src2)
        if (*src2++ != *src1++) {
            RETURN ( false );
        }
%}
.
    ^ true

    "'hello world' endsWith:'world'"
    "'hello world' endsWith:'earth'"
!

isBlank
    "return true, if the receiver contains spaces only"

%{  /* NOCONTEXT */

    REGISTER unsigned char *src;

    src = _stringVal(self);
    if (_qClass(self) != String)
        src += _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);

    while (*src)
        if (*src++ != ' ') {
            RETURN ( false );
        }
%}
.
    ^ true
!

countWords
    "return the number of words, which are separated by separators"

    |tally start stop mySize|

    tally := 0.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
         (self at:start) isSeparator ifTrue:[
             start := start + 1
         ] ifFalse:[
             stop := self indexOfSeparatorStartingAt:start.
             (stop == 0) ifTrue:[
                 stop := mySize + 1
             ].
             tally := tally + 1.
             start := stop
         ]
    ].
    ^ tally
!

asCollectionOfWords
    "return a collection containing the words (separated by whitespace) of the receiver"

    |words start stop mySize|

    words := OrderedCollection new.
    start := 1.
    mySize := self size.
    [start <= mySize] whileTrue:[
        (self at:start) isSeparator ifTrue:[
            start := start + 1
        ] ifFalse:[
            stop := self indexOfSeparatorStartingAt:start.
            stop == 0 ifTrue:[
                words add:(self copyFrom:start to:mySize).
                ^ words
            ].
            words add:(self copyFrom:start to:(stop - 1)).
            start := stop
        ]
    ].
    ^ words
!

levenshteinTo:aString
    "return the levenshtein distance to the argument, aString;
     this value corrensponds to the number of replacements that have to be
     made to get aString from the receiver.
     see IEEE transactions on Computers 1976 Pg 172 ff."

    ^ self levenshteinTo:aString s:4 c:1 i:2 d:6
!

levenshteinTo:aString s:substWeight c:caseWeight i:insrtWeight d:deleteWeight
    "parametrized levenshtein. arguments are the costs for
     substitution, case-change, insertion and deletion of a character."

    |d  "delta matrix"
     len1 len2 dim prevRow row col dimPlus1
     min pp c1 c2|

%{  /* NOCONTEXT */

    /* 
     * this is very heavy used when correcting errors 
     * (all symbols are searched for best match) - therefore it must be fast
     */
{
    unsigned short *data;
    int l1, l2;
    REGISTER int sz;
    unsigned char *s1, *s2;
    int v1, v2, v3, m;
    REGISTER unsigned short *dp;
    REGISTER int delta;
    REGISTER int j;
    int i;
    int iW, cW, sW, dW;
#   define FASTSIZE 30
    short fastData[(FASTSIZE + 1) * (FASTSIZE + 1)];

    if ((_isString(self) || _isSymbol(self))
     && (_isString(aString) || _isSymbol(aString))
     && _isSmallInteger(insrtWeight) && _isSmallInteger(caseWeight)
     && _isSmallInteger(substWeight) && _isSmallInteger(deleteWeight)) {
        iW = _intVal(insrtWeight);
        cW = _intVal(caseWeight);
        sW = _intVal(substWeight);
        dW = _intVal(deleteWeight);
        s1 = _stringVal(self);
        s2 = _stringVal(aString);
        l1 = strlen(s1);
        l2 = strlen(s2);

        sz = (l1 < l2) ? l2 : l1;
        delta = sz + 1;
        if (sz <= FASTSIZE) {
            data = fastData;
        } else {
            /* add ifdef ALLOCA here ... */
            data = (unsigned short *)malloc(delta * delta * sizeof(short));
        }
        
        data[0] = 0;
        dp = data+1;
        for (j=1, dp=data+1; j<=sz; j++, dp++)
            *dp = *(dp-1) + iW;
        
        for (i=1, dp=data+delta; i<=sz; i++, dp+=delta)
            *dp = *(dp-delta) + dW;

        for (i=1; i<=l1; i++) {
            for (j=1; j<=l2; j++) {
                dp = data + (i*delta) + j;
                if (s1[i] != s2[j]) {
                    if (tolower(s1[i]) == tolower(s2[j])) {
                        m = cW;
                    } else {
                        m = sW;
                    }
                } else
                    m = 0;

                v2 = *(dp - 1) + iW;
                v3 = *(dp - delta) + dW;
                v1 = *(dp - delta - 1) + m;
                if (v1 < v2)
                    if (v1 < v3)
                        m = v1;
                    else
                        m = v3;
                else
                    if (v2 < v3)
                        m = v2;
                    else
                        m = v3;
                *dp = m;
            }
        }
        m = data[l1 * delta + l2];
        if (sz > FASTSIZE) 
            free(data);
        RETURN ( _MKSMALLINT(m) );
    }
}
%}
.
    len1 := self size.
    len2 := aString size.

    "create the help-matrix"

    dim := len1 max:len2.
    dimPlus1 := dim + 1.

    d := Array new:dimPlus1.
    1 to:dimPlus1 do:[:i |
        d at:i put:(Array new:dimPlus1)
    ].

    "init help-matrix"

    (d at:1) at:1 put:0.
    row := d at:1.
    1 to:dim do:[:j |
        row at:(j + 1) put:( (row at:j) + insrtWeight )
    ].

    1 to:dim do:[:i |
         (d at:(i + 1)) at:1 put:(  ((d at:i) at:1) + deleteWeight )
    ].

    1 to:len1 do:[:i |
        c1 := self at:i.
        1 to:len2 do:[:j |
            c2 := aString at:j.
            (c1 == c2) ifTrue:[
                pp := 0
            ] ifFalse:[
                (c1 asLowercase == c2 asLowercase) ifTrue:[
                    pp := caseWeight
                ] ifFalse:[
                    pp := substWeight
                ]
            ].
            prevRow := d at:i.
            row := d at:(i + 1).
            col := j + 1.
            min := (prevRow at:j) + pp.
            min := min min:( (row at:j) + insrtWeight).
            min := min min:( (prevRow at:col) + deleteWeight).
            row at:col put: min
        ]
    ].

    ^ (d at:(len1 + 1)) at:(len2 + 1)

    "'ocmprt' levenshteinTo:'computer'
     'computer' levenshteinTo:'computer'
     'ocmputer' levenshteinTo:'computer'
     'cmputer' levenshteinTo:'computer'
     'Computer' levenshteinTo:'computer'"
! !

!String methodsFor:'copying'!

shallowCopy
    "return a copy of the receiver
     - redefined for more speed"

    ^ self copyFrom:1
!

deepCopy
    "return a copy of the receiver
     - redefined for speed"

    ^ self copyFrom:1
!

, aString
    "return the concatenation of myself and the argument, aString
     - reimplemented here for speed"

    |newString|
%{
    int l1, l2;
    char *cp1, *cp2;
    REGISTER unsigned char *dstp;
    REGISTER OBJ s = aString;
    OBJ new();

    if ((_qClass(s) == String) || (_qClass(s) == Symbol) || (_qClass(s) == _qClass(self))) {
        cp1 = (char *) _stringVal(self);
        l1 = _stringSize(self);
        if (_qClass(self) != String) {
            int n = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);

            cp1 += n;
            l1 -= n;
        }

        cp2 = (char *) _stringVal(s);
        l2 = _stringSize(s);
        if (_qClass(s) != String) {
            int n = _intVal(_ClassInstPtr(_qClass(s))->c_ninstvars) * sizeof(OBJ);

            cp2 += n;
            l2 -= n;
        }

        _qNew(newString, OHDR_SIZE + l1 + l2 + 1, __context);
        _InstPtr(newString)->o_class = String;
        dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
        bcopy(cp1, dstp, l1);
        bcopy(cp2, dstp + l1, l2+1);
#else
# ifdef FAST_STRCPY
        strcpy(dstp, cp1);
        strcpy(dstp + l1, cp2);
# else
        while ((*dstp++ = *cp1++) != '\0') ;
        dstp--;
        while ((*dstp++ = *cp2++) != '\0') ;
# endif
#endif
        RETURN ( newString );
    }
%}
.
    ^ super , aString
!

concatenate:string1 and:string2
    "return the concatenation of myself and the arguments, string1 and string2.
     This is equivalent to self , string1 , string2
     - generated by compiler when such a construct is detected"

    |newString|
%{
    int len1, len2, len3;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    OBJ new();

    if ((_isString(self) || _isSymbol(self))
     && (_isString(string1) || _isSymbol(string1))
     && (_isString(string2) || _isSymbol(string2))) {
        len1 = _stringSize(self);
        len2 = _stringSize(string1);
        len3 = _stringSize(string2);
        _qNew(newString, OHDR_SIZE + len1 + len2 + len3 + 1, __context);
        _InstPtr(newString)->o_class = String;
        dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
        bcopy(_stringVal(self), dstp, len1);
        bcopy(_stringVal(string1), dstp + len1, len2);
        bcopy(_stringVal(string2), dstp + len1 + len2, len3+1);
#else
# ifdef FAST_STRCPY
        strcpy(dstp, _stringVal(self));
        strcpy(dstp + len1, _stringVal(string1));
        strcpy(dstp + len1 + len2, _stringVal(string2));
# else
        srcp = _stringVal(self);
        while ((*dstp++ = *srcp++) != '\0') ;
        dstp--;
        srcp = _stringVal(string1);
        while ((*dstp++ = *srcp++) != '\0') ;
        dstp--;
        srcp = _stringVal(string2);
        while ((*dstp++ = *srcp++) != '\0') ;
# endif
#endif
        RETURN ( newString );
    }
%}
.
    ^ super , string1 , string2
!

concatenate:string1 and:string2 and:string3
    "return the concatenation of myself and the string arguments.
     This is equivalent to self , string1 , string2 , string3
     - generated by compiler when such a construct is detected"

    |newString|
%{
    int len1, len2, len3, len4;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    OBJ new();

    if ((_isString(self) || _isSymbol(self))
     && (_isString(string1) || _isSymbol(string1))
     && (_isString(string2) || _isSymbol(string2))
     && (_isString(string3) || _isSymbol(string3))) {
        len1 = _stringSize(self);
        len2 = _stringSize(string1);
        len3 = _stringSize(string2);
        len4 = _stringSize(string3);
        _qNew(newString, OHDR_SIZE + len1 + len2 + len3 + len4 + 1, __context);
        _InstPtr(newString)->o_class = String;
        dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
        bcopy(_stringVal(self), dstp, len1);
        bcopy(_stringVal(string1), dstp + len1, len2);
        bcopy(_stringVal(string2), dstp + len1 + len2, len3);
        bcopy(_stringVal(string3), dstp + len1 + len2 + len3, len4+1);
#else
# ifdef FAST_STRCPY
        strcpy(dstp, _stringVal(self));
        strcpy(dstp + len1, _stringVal(string1));
        strcpy(dstp + len1 + len2, _stringVal(string2));
        strcpy(dstp + len1 + len2 + len3, _stringVal(string3));
# else
        srcp = _stringVal(self);
        while ((*dstp++ = *srcp++) != '\0') ;
        dstp--;
        srcp = _stringVal(string1);
        while ((*dstp++ = *srcp++) != '\0') ;
        dstp--;
        srcp = _stringVal(string2);
        while ((*dstp++ = *srcp++) != '\0') ;
        dstp--;
        srcp = _stringVal(string3);
        while ((*dstp++ = *srcp++) != '\0') ;
# endif
#endif
        RETURN ( newString );
    }
%}
.
    ^ super , string1 , string2 , string3
!

copyWith:aCharacter
    "return the concatenation of myself and the argument, aCharacter
     - reimplemented here for speed"

    |newString|

    (aCharacter isMemberOf:Character) ifFalse:[
        ^ super copyWith:aCharacter
    ].
%{
    OBJ new();
    int sz;
    REGISTER unsigned char *dstp;
    int offs;

    sz = _qSize(self) + 1;
    if (_qClass(self) != String) {
        offs = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
        sz -= offs;
    } else
        offs = 0;

    _qNew(newString, sz, __context);
    _InstPtr(newString)->o_class = String;
    dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
    sz = sz - OHDR_SIZE - 1 - 1;
    bcopy(_stringVal(self) + offs, dstp, sz);
    dstp += sz;
#else
# ifdef FAST_STRCPY
    strcpy(dstp, _stringVal(self) + offs);
    dstp += sz - OHDR_SIZE - 1 - 1;
# else
    {
        REGISTER unsigned char *srcp;

        srcp = _stringVal(self) + offs;
        while ((*dstp = *srcp++) != '\0')
            dstp++;
    }
# endif
#endif
    *dstp++ = _intVal(_characterVal(aCharacter));
    *dstp = '\0';
%}
.
    ^ newString
!

copyFrom:start to:stop
    "return the substring starting at index start, anInteger and ending
     at stop, anInteger.
     - reimplemented here for speed"

    |newString|
%{
    OBJ new();
#if !defined(FAST_MEMCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    REGISTER int count;
    int len, index1, index2;

    if (_isSmallInteger(start) && _isSmallInteger(stop)) {
        len = _stringSize(self);
        index1 = _intVal(start);
        index2 = _intVal(stop);

        if ((index1 <= index2) && (index1 > 0)) {
            if (_qClass(self) != String) {
                int n = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);

                index1 += n;
                index2 += n;
            }
            if (index2 <= len) {
                count = index2 - index1 + 1;
                _qNew(newString, OHDR_SIZE+count+1, __context);
                _InstPtr(newString)->o_class = String;
                dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
                bcopy(_stringVal(self) + index1 - 1, dstp, count);
                dstp[count] = '\0';
#else
                srcp = _stringVal(self) + index1 - 1;
                while (count--) {
                    *dstp++ = *srcp++;
                }
                *dstp = '\0';
#endif
                RETURN ( newString );
            }
        }
    }
%}
.
    ^ super copyFrom:start to:stop
!

copyFrom:start
    "return the substring from start, anInteger to the end
     - reimplemented here for speed"

    |newString|
%{
    OBJ new();
#if !defined(FAST_MEMCPY)
    REGISTER unsigned char *srcp;
#endif
    REGISTER unsigned char *dstp;
    REGISTER int count;
    int len, index1;

    if (_isSmallInteger(start)) {
        len = _stringSize(self);
        index1 = _intVal(start);
        if (index1 > 0) {
            if (_qClass(self) != String) {
                int n = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);

                index1 += n;
            }
            if (index1 <= len) {
                count = len - index1 + 1;
                _qNew(newString, OHDR_SIZE+count+1, __context);
                _InstPtr(newString)->o_class = String;
                dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
                bcopy(_stringVal(self) + index1 - 1, dstp, count);
                dstp[count] = '\0';
#else
                srcp = _stringVal(self) + index1 - 1;
                while (count--) {
                    *dstp++ = *srcp++;
                }
                *dstp = '\0';
#endif
                RETURN ( newString );
            }
        }
    }
%}
.
    ^ super copyFrom:start
! !

!String methodsFor:'filling and replacing'!

replaceFrom:start to:stop with:aString startingAt:repStart
    "replace the characters starting at index start, anInteger and ending
     at stop, anInteger with characters from aString starting at repStart.

     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp, *dstp;
    REGISTER int count;
    int len, index1, index2;
    int repLen, repIndex;

    if ((_isString(aString) || _isSymbol(aString))
     && _isString(self)
     && _isSmallInteger(start)
     && _isSmallInteger(stop)) {
        len = _stringSize(self);
        index1 = _intVal(start);
        index2 = _intVal(stop);
        count = index2 - index1 + 1;
        if (count <= 0) {
             RETURN (self);
        }
        if ((index2 <= len) && (index1 > 0)) {
            repLen = _stringSize(aString);
            repIndex = _intVal(repStart);
            if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) {
                srcp = _stringVal(aString) + repIndex - 1;
                dstp = _stringVal(self) + index1 - 1;
                if (aString == self) {
                    /* take care of overlapping copy */
                    if (srcp < dstp) {
                        /* must do a reverse copy */
                        srcp += count;
                        dstp += count;
                        while (count-- > 0) {
                            *--dstp = *--srcp;
                        }
                        RETURN (self);
                    }
                }
#ifdef FAST_MEMCPY
                bcopy(srcp, dstp, count);
#else
                while (count-- > 0) {
                    *dstp++ = *srcp++;
                }
#endif
                RETURN (self);
            }
        }
    }
%}
.
    ^ super replaceFrom:start to:stop with:aString startingAt:repStart
!

replaceAll:oldCharacter by:newCharacter
    "replace all oldCharacters by newCharacter in the receiver"

%{  /* NOCONTEXT */

    REGISTER unsigned char *srcp;
    REGISTER unsigned oldVal, newVal;

    if (_isCharacter(oldCharacter)
     && _isCharacter(newCharacter)
     && _isString(self)) {
        srcp = (unsigned char *)_stringVal(self);
        oldVal = _intVal(_characterVal(oldCharacter));
        newVal = _intVal(_characterVal(newCharacter));
        while (*srcp) {
            if (*srcp == oldVal)
                *srcp = newVal;
            srcp++;
        }
        RETURN ( self );
    }
%}
.
    ^ super replaceAll:oldCharacter by:newCharacter
!

reverse                                                                         
    "in-place reverse the characters of the string"

%{  /* NOCONTEXT */

    REGISTER char c;
    REGISTER unsigned char *hip, *lowp;

    if (_isString(self)) {
        lowp = _stringVal(self);
        hip = lowp + _stringSize(self) - 1;
        while (lowp < hip) {
            c = *lowp;
            *lowp = *hip;
            *hip = c;
            lowp++;
            hip--;
        }
        RETURN ( self );
    }
%}
.
    ^ super reverse
!

withCRs
    "return a copy of the receiver, where
     all \-characters are replaced by newline characters
     - reimplemented here for speed"

    |newString|
%{
    OBJ new();
    REGISTER char c;
    REGISTER unsigned char *srcp, *dstp;
    int len, offs;

    len = _qSize(self);
    if (_qClass(self) != String) {
        offs = _intVal(_ClassInstPtr(_qClass(self))->c_ninstvars) * sizeof(OBJ);
        len -= offs;
    } else
        offs = 0;

    _qNew(newString, len, __context);
    _InstPtr(newString)->o_class = String;
    srcp = _stringVal(self) + offs;
    dstp = _stringVal(newString);
    while (c = *srcp++)
        if (c == '\\')
            *dstp++ = '\n';
        else
            *dstp++ = c;
    *dstp++ = '\0';
    RETURN ( newString );
%}
!

atAllPut:aCharacter
    "replace all characters with aCharacter
     - reimplemented here for speed"

%{  /* NOCONTEXT */

    REGISTER int  byteValue;
#ifndef FAST_MEMSET
    REGISTER unsigned char *dst;
#endif

    if (_isCharacter(aCharacter) && _isString(self)) {
        byteValue = _intVal(_characterVal(aCharacter));
#ifdef FAST_MEMSET
        memset(_stringVal(self), byteValue, _qSize(self) - OHDR_SIZE - 1);
#else
        dst = _stringVal(self);
        while (*dst != '\0')
            *dst++ = byteValue;
#endif
        RETURN ( self );
    }
%}
.
    ^ super atAllPut:aCharacter
!

withoutSpaces
    "return a copy of myself without leading and trailing spaces"

    |startIndex endIndex blank|

    startIndex := 1.
    endIndex := self size.
    blank := Character space.
    [(startIndex < endIndex) and:[(self at:startIndex) == blank]] whileTrue:[
        startIndex := startIndex + 1
    ].
    [(endIndex > 1) and:[(self at:endIndex) == blank]] whileTrue:[
        endIndex := endIndex - 1
    ].
    startIndex > endIndex ifTrue:[
        ^ ''
    ].
    ((startIndex == 1) and:[endIndex == self size]) ifTrue:[
        ^ self
    ].
    ^ self copyFrom:startIndex to:endIndex
!

withoutSeparators
    "return a copy of myself without leading and trailing whitespace"

    |startIndex endIndex|

    startIndex := 1.
    endIndex := self size.
    [(startIndex < endIndex) and:[(self at:startIndex) isSeparator]] whileTrue:[
        startIndex := startIndex + 1
    ].
    [(endIndex > 1) and:[(self at:endIndex) isSeparator]] whileTrue:[
        endIndex := endIndex - 1
    ].
    startIndex > endIndex ifTrue:[
        ^ ''
    ].
    ((startIndex == 1) and:[endIndex == self size]) ifTrue:[
        ^ self
    ].
    ^ self copyFrom:startIndex to:endIndex
! !

!String methodsFor:'queries'!

encoding
    "assume iso8859 encoding"

    ^ #iso8859
!

knownAsSymbol
    "return true, if there is a symbol with same characters in the
     system - use to check for existance of a symbol without creating one"

%{  /* NOCONTEXT */
    extern OBJ _KNOWNASSYMBOL();

    RETURN ( _KNOWNASSYMBOL(_stringVal(self)) );
%}
! !