"
COPYRIGHT (c) 1988 by Claus Gittinger
All Rights Reserved
This software is furnished under a license and may be used
only in accordance with the terms of that license and with the
inclusion of the above copyright notice. This software may not
be provided or otherwise made available to, or used by, any
other person. No title to or ownership of the software is
hereby transferred.
"
AbstractString subclass:#String
instanceVariableNames:''
classVariableNames:''
poolDictionaries:''
category:'Collections-Text'
!
String comment:'
COPYRIGHT (c) 1988 by Claus Gittinger
All Rights Reserved
$Header: /cvs/stx/stx/libbasic/String.st,v 1.18 1994-10-10 00:28:43 claus Exp $
'!
!String class methodsFor:'documentation'!
copyright
"
COPYRIGHT (c) 1988 by Claus Gittinger
All Rights Reserved
This software is furnished under a license and may be used
only in accordance with the terms of that license and with the
inclusion of the above copyright notice. This software may not
be provided or otherwise made available to, or used by, any
other person. No title to or ownership of the software is
hereby transferred.
"
!
version
"
$Header: /cvs/stx/stx/libbasic/String.st,v 1.18 1994-10-10 00:28:43 claus Exp $
"
!
documentation
"
Strings are ByteArrays storing Characters.
Strings are kind of kludgy: to allow for easy handling by c-functions,
there is always one 0-byte added at the end, which is not counted
in size. also, the at:put: method does not allow for storing 0-bytes.
(to do this, the basicAt:put: and basicNew: methods are redefined)
You cannot add any instvars to String, since the the run time system & compiler
creates literal strings and knows that strings have no named instvars.
If you really need strings with instVars, you have to create a subclass
of String (the access functions defined here can handle this).
A little warning though: not all smalltalk systems allow subclassing String,
so your program may become unportable if you do so.
"
! !
%{
#include <stdio.h>
#include <ctype.h>
/*
* old st/x creates strings with spaces in it;
* new st/x will fill it with zeros (for st-80 compatibility)
* the define below sets old behavior.
*/
#define INITIALIZE_WITH_SPACE
%}
!String class methodsFor:'queries'!
isBuiltInClass
"this class is known by the run-time-system"
^ self == String
! !
!String class methodsFor:'instance creation'!
basicNew:anInteger
"return a new empty string with anInteger characters"
%{ /* NOCONTEXT */
OBJ newString;
REGISTER int len;
REGISTER unsigned char *cp;
REGISTER OBJ *op;
int nInstVars, instsize;
extern OBJ new();
if (_isSmallInteger(anInteger)) {
len = _intVal(anInteger);
if (len >= 0) {
if (self == String) {
instsize = OHDR_SIZE + len + 1;
if (_CanDoQuickNew(instsize)) {
/*
* the most common case
*/
_qCheckedNew(newString, instsize);
_InstPtr(newString)->o_class = self;
cp = _stringVal(newString);
#ifdef FAST_MEMSET
memset(cp, ' ', len);
*(cp + len) = '\0';
#else
while (len >= 8) {
cp[0] = cp[1] = cp[2] = cp[3] = ' ';
cp[4] = cp[5] = cp[6] = cp[7] = ' ';
cp += 8;
len -= 8;
}
while (len--)
*cp++ = ' ';
*cp = '\0';
#endif
RETURN (newString);
}
nInstVars = 0;
} else {
nInstVars = _intVal(_ClassInstPtr(self)->c_ninstvars);
instsize = OHDR_SIZE + __OBJS2BYTES__(nInstVars) + len + 1;
}
PROTECT_CONTEXT
_qNew(newString, instsize, SENDER);
UNPROTECT_CONTEXT
if (newString == nil) goto fail;
_InstPtr(newString)->o_class = self;
if (nInstVars) {
#if defined(memset4)
memset4(_InstPtr(newString)->i_instvars, nil, nInstVars);
#else
# if defined(FAST_MEMSET) && ! defined(NEGATIVE_ADDRESSES)
/*
* knowing that nil is 0
*/
memset(_InstPtr(newString)->i_instvars, 0, __OBJS2BYTES__(nInstVars));
# else
op = _InstPtr(newString)->i_instvars;
do {
*op++ = nil;
} while (--nInstVars);
# endif
#endif
cp = _stringVal(newString) + __OBJS2BYTES__(nInstVars);
} else {
cp = _stringVal(newString);
}
#ifdef FAST_MEMSET
memset(cp, ' ', len);
*(cp + len) = '\0';
#else
while (len >= 8) {
cp[0] = cp[1] = cp[2] = cp[3] = ' ';
cp[4] = cp[5] = cp[6] = cp[7] = ' ';
cp += 8;
len -= 8;
}
while (len--)
*cp++ = ' ';
*cp = '\0';
#endif
RETURN (newString);
}
}
fail: ;;
%}
.
"
invalid argument, or out-of-memory:
use error handling in superclass
"
^ (super basicNew:anInteger) atAllPut:(Character space)
!
unititializedNew:anInteger
"redefine it back - strings must have a 0-byte at the end"
^ self basicNew:anInteger
! !
!String methodsFor:'accessing'!
basicSize
"return the number of characters in myself.
Redefined here to exclude the 0-byte at the end."
%{ /* NOCONTEXT */
OBJ cls;
cls = _qClass(self);
if ((cls == String) || (cls == Symbol)) {
RETURN ( _MKSMALLINT(_stringSize(self)) );
}
%}
.
^ super basicSize - 1
!
basicAt:index
"return the character at position index, an Integer
- reimplemented here since we return characters"
%{ /* NOCONTEXT */
REGISTER int indx;
int len;
OBJ cls;
if (_isSmallInteger(index)) {
indx = _intVal(index);
cls = _qClass(self);
if (cls != String)
indx += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
len = _stringSize(self);
if ((indx > 0) && (indx <= len)) {
RETURN ( _MKCHARACTER(_stringVal(self)[indx-1] & 0xFF) );
}
}
%}.
^ self subscriptBoundsError:index
!
basicAt:index put:aCharacter
"store the argument, aCharacter at position index, an Integer
- reimplemented here since we store characters"
%{ /* NOCONTEXT */
REGISTER int value, indx;
int len;
OBJ cls;
if (__isCharacter(aCharacter)) {
value = _intVal(_characterVal(aCharacter));
if ((value > 0)
&& (value <= 255)
&& _isSmallInteger(index)) {
indx = _intVal(index);
cls = _qClass(self);
if (cls != String)
indx += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
len = _stringSize(self);
if ((indx > 0) && (indx <= len)) {
_stringVal(self)[indx-1] = value;
RETURN ( aCharacter );
}
}
}
%}.
(aCharacter isMemberOf:Character) ifFalse:[
^ self elementNotCharacter
] ifTrue:[
(aCharacter asciiValue between:1 and:255) ifFalse:[
^ self elementBoundsError
] ifTrue:[
^ self subscriptBoundsError:index
]
]
! !
!String methodsFor:'converting'!
asSymbol
"return a unique symbol with name taken from myself.
The argument must be a String, subclass instances are not allowed."
%{
if (_qClass(self) == String) {
RETURN ( _MKSYMBOL(_stringVal(self), (OBJ *)0, __context) );
}
%}
.
self primitiveFailed
! !
!String class methodsFor:'binary storage'!
binaryDefinitionFrom: stream manager: manager
^ (stream next: (stream nextNumber: 4)) asString
! !
!String methodsFor:'printing & storing'!
isLiteral
"return true, if the receiver can be used as a literal
(i.e. can be used in constant arrays)"
^ true
!
print
"print the receiver on standard output.
This method does NOT use the stream classes and will therefore work
even in case of emergency."
%{ /* NOCONTEXT */
if (_qClass(self) == String) {
printf("%s", _stringVal(self));
RETURN (self);
}
%}
.
^ super print
!
printfPrintString:formatString
"non-portable but sometimes useful.
Return a printed representation of the receiver as specified by formatString,
which is defined by printf.
No checking on buffer overrun is done.
This method is NONSTANDARD and may be removed without notice."
%{ /* STACK: 1000 */
char buffer[800];
char *cp;
if (__isString(formatString)) {
/*
* actually only needed on sparc: since thisContext is
* in a global register, which gets destroyed by printf,
* manually save it here - very stupid ...
*/
OBJ sav = __thisContext;
cp = (char *)_stringVal(self);
if (_qClass(self) != String)
cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
sprintf(buffer, (char *)_stringVal(formatString), cp);
__thisContext = sav;
RETURN ( _MKSTRING(buffer COMMA_SND) );
}
%}
.
self primitiveFailed
"'hello' printfPrintString:'%%s -> %s'"
"'hello' printfPrintString:'%%10s -> %10s'"
"'hello' printfPrintString:'%%-10s -> %-10s'"
!
storeString
"return a String for storing myself"
|s n index|
n := self occurrencesOf:$'.
n == 0 ifFalse:[
s := String new:(n + 2 + self size).
s at:1 put:$'.
index := 2.
self do:[:thisChar |
(thisChar == $') ifTrue:[
s at:index put:thisChar.
index := index + 1.
].
s at:index put:thisChar.
index := index + 1.
].
s at:index put:$'.
^ s
].
^ '''' , self , ''''
!
storeOn:aStream
"put the storeString of myself on aStream"
aStream nextPut: $'.
(self includes:$') ifTrue:[
self do:[:thisChar |
(thisChar == $') ifTrue:[aStream nextPut:thisChar].
aStream nextPut:thisChar
]
] ifFalse:[
aStream nextPutAll:self
].
aStream nextPut:$'
! !
!String methodsFor:'comparing'!
> aString
"Compare the receiver with the argument and return true if the
receiver is greater than the argument. Otherwise return false.
No national variants are honred; use after: for this.
In contrast to ST-80, case differences are NOT ignored, thus
'foo' > 'Foo' will return true.
This may change."
%{ /* NOCONTEXT */
int len1, len2, cmp;
REGISTER OBJ s = aString;
char *cp1, *cp2;
OBJ cls;
if (_isNonNilObject(s)
&& (((cls = _qClass(s)) == String) || (cls == Symbol) || (cls == _qClass(self)))) {
cp1 = (char *) _stringVal(self);
len1 = _stringSize(self);
/*
* care for instances of subclasses ...
*/
if (_qClass(self) != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
cp1 += n;
len1 -= n;
}
cp2 = (char *) _stringVal(s);
len2 = _stringSize(s);
/*
* care for instances of subclasses ...
*/
if (cls != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
cp2 += n;
len2 -= n;
}
if (len1 <= len2)
cmp = strncmp(cp1, cp2, len1);
else
cmp = strncmp(cp1, cp2, len2);
if (cmp > 0) {
RETURN ( true );
}
if ((cmp == 0) && (len1 > len2)) {
RETURN ( true );
}
RETURN ( false );
}
%}
.
^ super > aString
!
= aString
"Compare the receiver with the argument and return true if the
receiver is equal to the argument. Otherwise return false.
This compare is case-sensitive (i.e. 'Foo' is NOT = 'foo')"
%{ /* NOCONTEXT */
int l1, l2;
REGISTER OBJ s = aString;
char *cp1, *cp2;
OBJ cls;
if (s == self) {
RETURN ( true );
}
if (! _isNonNilObject(s)) {
RETURN ( false );
}
if (((cls = _qClass(s)) == String) || (cls == Symbol) || (cls == _qClass(self))) {
cp1 = (char *) _stringVal(self);
l1 = _stringSize(self);
/*
* care for instances of subclasses ...
*/
if (_qClass(self) != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
cp1 += n;
l1 -= n;
}
cp2 = (char *) _stringVal(s);
l2 = _stringSize(s);
/*
* care for instances of subclasses ...
*/
if (cls != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
cp2 += n;
l2 -= n;
}
if (l1 != l2) {
RETURN ( false );
}
RETURN ( (strncmp(cp1, cp2, l1) == 0) ? true : false );
}
%}
.
^ super = aString
!
~= aString
"Compare the receiver with the argument and return true if the
receiver is not equal to the argument. Otherwise return false.
This compare is case-sensitive (i.e. 'Foo' is NOT = 'foo')"
%{ /* NOCONTEXT */
int l1, l2;
REGISTER OBJ s = aString;
char *cp1, *cp2;
OBJ cls;
if (s == self) {
RETURN ( false );
}
if (! _isNonNilObject(s)) {
RETURN ( true );
}
if (((cls = _qClass(s)) == String) || (cls == Symbol) || (cls == _qClass(self))) {
cp1 = (char *) _stringVal(self);
l1 = _stringSize(self);
/*
* care for instances of subclasses ...
*/
if (_qClass(self) != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
cp1 += n;
l1 -= n;
}
cp2 = (char *) _stringVal(s);
l2 = _stringSize(s);
/*
* care for instances of subclasses ...
*/
if (cls != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
cp2 += n;
l2 -= n;
}
if (l1 != l2) {
RETURN ( true );
}
RETURN ( (strncmp(cp1, cp2, l1) == 0) ? false : true );
}
%}
.
^ super ~= aString
!
after:aString
"Compare the receiver with the argument and return true if the
receiver should come after the argument in a sorted list.
Otherwise return false.
The comparison is language specific, depending on the value of
LC_COLLATE, which is initialized from the environment."
%{ /* NOCONTEXT */
int cmp;
REGISTER OBJ s = aString;
char *cp1, *cp2;
OBJ cls;
if (_isNonNilObject(s)
&& (((cls = _qClass(s)) == String) || (cls == Symbol) || (cls == _qClass(self)))) {
cp1 = (char *) _stringVal(self);
/*
* care for instances of subclasses ...
*/
if (_qClass(self) != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
cp1 += n;
}
cp2 = (char *) _stringVal(s);
/*
* care for instances of subclasses ...
*/
if (cls != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
cp2 += n;
}
#ifdef HAS_STRCOLL
cmp = strcoll(cp1, cp2);
#else
cmp = strcmp(cp1, cp2);
#endif
if (cmp > 0) {
RETURN ( true );
}
RETURN ( false );
}
%}.
"
currently, this operation is only defined for strings, symbols
and subclasses.
"
self primitiveFailed
! !
!String methodsFor:'character searching'!
occurrencesOf:aCharacter
"count the occurrences of the argument, aCharacter in myself
- reimplemented here for speed"
%{ /* NOCONTEXT */
REGISTER unsigned char *cp;
REGISTER int byteValue;
REGISTER int count;
OBJ cls;
if (__isCharacter(aCharacter)) {
count = 0;
byteValue = _intVal(_characterVal(aCharacter));
cp = _stringVal(self);
if ((cls = _qClass(self)) != String)
cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
while (*cp) {
if (*cp++ == byteValue) count++;
}
RETURN ( _MKSMALLINT(count) );
}
%}
.
^ 0
"
'hello world' occurrencesOf:$a
'hello world' occurrencesOf:$w
'hello world' occurrencesOf:$l
'hello world' occurrencesOf:$x
'hello world' occurrencesOf:1
"
!
includes:aCharacter
"return true if the argument, aCharacter is included in the receiver
- reimplemented here for speed"
%{ /* NOCONTEXT */
REGISTER unsigned char *cp;
REGISTER int byteValue;
extern char *strchr();
OBJ cls;
if (__isCharacter(aCharacter)) {
byteValue = _intVal(_characterVal(aCharacter));
cp = _stringVal(self);
if ((cls = _qClass(self)) != String)
cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
#ifdef FAST_STRCHR
cp = (unsigned char *) strchr(cp, _intVal(_characterVal(aCharacter)));
if (cp) {
RETURN ( true );
}
#else
while (*cp) {
if (*cp == byteValue) {
RETURN ( true );
}
cp++;
}
#endif
}
%}
.
^ false
"
'hello world' includes:$a
'hello world' includes:$o
'hello world' includes:$x
'hello world' includes:1
"
!
includesAny:aCollection
"return true, if the receiver includes any of the characters in the
argument, aCollection.
- redefined for speed if the argument is a String."
%{ /* NOCONTEXT */
REGISTER unsigned char *cp;
REGISTER unsigned char *matchP;
#ifdef SYSV
# define INDEX strchr
#else
# define INDEX index
#endif
extern char *INDEX();
OBJ cls;
if (__isString(aCollection)) {
matchP = _stringVal(aCollection);
cp = _stringVal(self);
if ((cls = _qClass(self)) != String)
cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
while (*cp) {
if (INDEX(matchP, *cp)) {
RETURN ( true );
}
cp++;
}
RETURN ( false );
}
%}
.
^ super includesAny:aCollection
"
'hello world' includesAny:'abcd'
'hello world' includesAny:'xyz'
'hello world' includesAny:(Array with:$a with:$b with:$d)
'hello world' includesAny:(Array with:$x with:$y)
'hello world' includesAny:(Array with:1 with:2)
"
!
indexOf:aCharacter
"return the index of the first occurrences of the argument, aCharacter
in the receiver or 0 if not found - reimplemented here for speed."
%{ /* NOCONTEXT */
REGISTER unsigned char *cp;
#ifdef FAST_STRCHR
char *strchr();
#else
REGISTER int byteValue;
REGISTER int index;
#endif
OBJ cls;
if (__isCharacter(aCharacter)) {
cp = _stringVal(self);
if ((cls = _qClass(self)) != String)
cp += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
#ifdef FAST_STRCHR
cp = (unsigned char *) strchr(cp, _intVal(_characterVal(aCharacter)));
if (cp) {
RETURN ( _MKSMALLINT(cp - _stringVal(self) + 1) );
}
#else
byteValue = _intVal(_characterVal(aCharacter));
index = 1;
while (*cp) {
if (*cp++ == byteValue) {
RETURN ( _MKSMALLINT(index) );
}
index++;
}
#endif
}
%}
.
^ 0
"
'hello world' indexOf:(Character space)
'hello world' indexOf:$A
"
!
indexOf:aCharacter startingAt:start
"return the index of the first occurrence of the argument, aCharacter
in myself starting at start, anInteger or 0 if not found;
- reimplemented here for speed"
%{ /* NOCONTEXT */
REGISTER unsigned char *cp;
REGISTER int index, byteValue;
#ifdef FAST_STRCHR
char *strchr();
#endif
int len;
OBJ cls;
if (_isSmallInteger(start)) {
if (__isCharacter(aCharacter)) {
byteValue = _intVal(_characterVal(aCharacter));
index = _intVal(start);
if (index <= 0)
index = 1;
if ((cls = _qClass(self)) != String)
index += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
len = _stringSize(self);
if (index <= len) {
cp = _stringVal(self) + index - 1;
#ifdef FAST_STRCHR
cp = (unsigned char *) strchr(cp, byteValue);
if (cp) {
RETURN ( _MKSMALLINT(cp - _stringVal(self) + 1) );
}
#else
while (*cp) {
if (*cp++ == byteValue) {
RETURN ( _MKSMALLINT(index) );
}
index++;
}
#endif
}
}
RETURN ( _MKSMALLINT(0) );
}
%}
.
^ super indexOf:aCharacter startingAt:start
"
'hello world' indexOf:$l startingAt:1
'hello world' indexOf:$l startingAt:5
"
!
indexOfSeparatorStartingAt:start
"return the index of the next separator character"
%{ /* NOCONTEXT */
REGISTER unsigned char *cp;
REGISTER char c;
int len, index;
OBJ cls;
index = _intVal(start);
if (index <= 0) {
index = 1;
}
if ((cls = _qClass(self)) != String)
index += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
len = _stringSize(self);
if (index > len) {
RETURN ( _MKSMALLINT(0) );
}
cp = _stringVal(self) + index - 1;
while (c = *cp++) {
#ifdef ASCII
if (c <= ' ')
#endif
if ((c == ' ') || (c == '\t') || (c == '\n')
|| (c == '\r') || (c == '\f')) {
RETURN ( _MKSMALLINT(cp - _stringVal(self)) );
}
}
%}
.
^ 0
"
'hello world' indexOfSeparatorStartingAt:3
'hello world' indexOfSeparatorStartingAt:7
"
! !
!String methodsFor:'pattern matching'!
startsWith:aString
"return true, if the receiver starts with something, aString."
aString isString ifFalse: [
(aString isMemberOf:Character) ifTrue:[
self isEmpty ifTrue:[^ false].
^ (self at:1) == aString
].
^ super startsWith:aString
].
%{
int len1, len2;
REGISTER unsigned char *src1, *src2;
REGISTER OBJ s = aString;
OBJ cls;
len1 = _qSize(self);
src1 = _stringVal(self);
if ((cls = _qClass(self)) != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
len1 -= n;
src1 += n;
}
len2 = _qSize(s);
src2 = _stringVal(s);
if ((cls = _qClass(s)) != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
len2 -= n;
src2 += n;
}
if (len1 < len2) {
RETURN ( false );
}
while (*src2)
if (*src2++ != *src1++) {
RETURN ( false );
}
%}
.
^ true
"
'hello world' startsWith:'hello'
'hello world' startsWith:'hi'
"
!
endsWith:aString
"return true, if the receiver end with something, aString."
aString isString ifFalse: [
(aString isMemberOf:Character) ifTrue:[
self isEmpty ifTrue:[^ false].
^ (self at:(self size)) == aString
].
^ super endsWith:aString
].
%{
int len1, len2;
REGISTER unsigned char *src1, *src2;
REGISTER OBJ s = aString;
OBJ cls;
len1 = _qSize(self);
src1 = _stringVal(self);
if ((cls = _qClass(self)) != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
len1 -= n;
src1 += n;
}
len2 = _qSize(s);
src2 = _stringVal(s);
if ((cls = _qClass(s)) != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
len2 -= n;
src2 += n;
}
if (len1 < len2) {
RETURN ( false );
}
src1 = _stringVal(self) + len1 - len2;
src2 = _stringVal(aString);
while (*src2)
if (*src2++ != *src1++) {
RETURN ( false );
}
%}
.
^ true
"
'hello world' endsWith:'world'
'hello world' endsWith:'earth'
"
! !
!String methodsFor:'testing'!
isBlank
"return true, if the receiver contains spaces only"
%{ /* NOCONTEXT */
REGISTER unsigned char *src;
OBJ cls;
src = _stringVal(self);
if ((cls = _qClass(self)) != String)
src += __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
while (*src)
if (*src++ != ' ') {
RETURN ( false );
}
%}
.
^ true
!
levenshteinTo:aString s:substWeight c:caseWeight i:insrtWeight d:deleteWeight
"parametrized levenshtein. arguments are the costs for
substitution, case-change, insertion and deletion of a character."
%{ /* STACK: 4000 */
/*
* this is very heavy used when correcting errors
* (all symbols are searched for best match) - therefore it must be fast
*/
{
unsigned short *data;
int l1, l2;
REGISTER int sz;
unsigned char *s1, *s2;
int v1, v2, v3, m;
REGISTER unsigned short *dp;
REGISTER int delta;
REGISTER int j;
int i;
int iW, cW, sW, dW;
# define FASTSIZE 30
unsigned short fastData[(FASTSIZE + 1) * (FASTSIZE + 1)];
if ((__isString(self) || __isSymbol(self))
&& (__isString(aString) || __isSymbol(aString))
&& _isSmallInteger(insrtWeight) && _isSmallInteger(caseWeight)
&& _isSmallInteger(substWeight) && _isSmallInteger(deleteWeight)) {
iW = _intVal(insrtWeight);
cW = _intVal(caseWeight);
sW = _intVal(substWeight);
dW = _intVal(deleteWeight);
s1 = _stringVal(self);
s2 = _stringVal(aString);
l1 = strlen(s1);
l2 = strlen(s2);
sz = (l1 < l2) ? l2 : l1;
delta = sz + 1;
if (sz <= FASTSIZE) {
data = fastData;
} else {
/* add ifdef ALLOCA here ... */
data = (unsigned short *)malloc(delta * delta * sizeof(short));
}
data[0] = 0;
dp = data+1;
for (j=1, dp=data+1; j<=sz; j++, dp++)
*dp = *(dp-1) + iW;
for (i=1, dp=data+delta; i<=sz; i++, dp+=delta)
*dp = *(dp-delta) + dW;
for (i=1; i<=l1; i++) {
for (j=1; j<=l2; j++) {
dp = data + (i*delta) + j;
if (s1[i] != s2[j]) {
if (tolower(s1[i]) == tolower(s2[j])) {
m = cW;
} else {
m = sW;
}
} else
m = 0;
v2 = *(dp - 1) + iW;
v3 = *(dp - delta) + dW;
v1 = *(dp - delta - 1) + m;
if (v1 < v2)
if (v1 < v3)
m = v1;
else
m = v3;
else
if (v2 < v3)
m = v2;
else
m = v3;
*dp = m;
}
}
m = data[l1 * delta + l2];
if (sz > FASTSIZE)
free(data);
RETURN ( _MKSMALLINT(m) );
}
}
%}
.
^ super levenshteinTo:aString
s:substWeight c:caseWeight
i:insrtWeight d:deleteWeight
"'ocmprt' levenshteinTo:'computer'
'computer' levenshteinTo:'computer'
'ocmputer' levenshteinTo:'computer'
'cmputer' levenshteinTo:'computer'
'Computer' levenshteinTo:'computer'"
! !
!String methodsFor:'copying'!
shallowCopy
"return a copy of the receiver"
(self isMemberOf:String) ifTrue:[
^ self copyFrom:1
].
^ super shallowCopy
!
deepCopyUsing:aDictionary
"return a copy of the receiver - reimplemented to be a bit faster"
"
could be an instance of a subclass which needs deepCopy
of its named instvars ...
"
(self isMemberOf:String) ifTrue:[
^ self copyFrom:1
].
^ super deepCopy
!
simpleDeepCopy
"return a copy of the receiver"
"
could be an instance of a subclass which needs deepCopy
of its named instvars ...
"
(self isMemberOf:String) ifTrue:[
^ self copyFrom:1
].
^ super deepCopy
!
deepCopy
"return a copy of the receiver"
"
could be an instance of a subclass which needs deepCopy
of its named instvars ...
"
(self isMemberOf:String) ifTrue:[
^ self copyFrom:1
].
^ super deepCopy
!
, aString
"return the concatenation of myself and the argument, aString as
a String.
- reimplemented here for speed"
%{
int l1, l2, sz;
char *cp1, *cp2;
REGISTER unsigned char *dstp;
REGISTER OBJ s = aString;
REGISTER OBJ _String = String;
OBJ myClass, argClass, newString;
OBJ new();
if (_isNonNilObject(s)) {
myClass = _qClass(self);
argClass = _qClass(s);
/*
* either same class or both Strings/Symbols:
* can do it here
*/
if ((myClass == argClass)
|| (((myClass == _String) || (myClass == Symbol))
&& ((argClass == _String) || (argClass == Symbol)))) {
cp1 = (char *) _stringVal(self);
l1 = _stringSize(self);
if (myClass != _String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
cp1 += n;
l1 -= n;
}
cp2 = (char *) _stringVal(s);
l2 = _stringSize(s);
if (argClass != _String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(s))->c_ninstvars));
cp2 += n;
l2 -= n;
}
sz = OHDR_SIZE + l1 + l2 + 1;
_qNew(newString, sz, __context);
if (newString != nil) {
_InstPtr(newString)->o_class = String;
dstp = _stringVal(newString);
/*
* refetch in case of a GC
*/
cp1 = (char *) _stringVal(self);
cp2 = (char *) _stringVal(s);
#ifdef FAST_MEMCPY
bcopy(cp1, dstp, l1);
bcopy(cp2, dstp + l1, l2+1);
#else
# ifdef FAST_STRCPY
strcpy(dstp, cp1);
strcpy(dstp + l1, cp2);
# else
while ((*dstp++ = *cp1++) != '\0') ;
dstp--;
while ((*dstp++ = *cp2++) != '\0') ;
# endif
#endif
RETURN ( newString );
}
}
}
%}
.
^ super , aString
!
concatenate:string1 and:string2
"return the concatenation of myself and the arguments, string1 and string2.
This is equivalent to self , string1 , string2
- generated by compiler when such a construct is detected"
|newString|
%{
int len1, len2, len3, sz;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
REGISTER unsigned char *srcp;
#endif
REGISTER unsigned char *dstp;
OBJ new();
if ((__isString(self) || __isSymbol(self))
&& (__isString(string1) || __isSymbol(string1))
&& (__isString(string2) || __isSymbol(string2))) {
len1 = _stringSize(self);
len2 = _stringSize(string1);
len3 = _stringSize(string2);
sz = OHDR_SIZE + len1 + len2 + len3 + 1;
_qNew(newString, sz, __context);
if (newString != nil) {
_InstPtr(newString)->o_class = String;
dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
bcopy(_stringVal(self), dstp, len1);
bcopy(_stringVal(string1), dstp + len1, len2);
bcopy(_stringVal(string2), dstp + len1 + len2, len3+1);
#else
# ifdef FAST_STRCPY
strcpy(dstp, _stringVal(self));
strcpy(dstp + len1, _stringVal(string1));
strcpy(dstp + len1 + len2, _stringVal(string2));
# else
srcp = _stringVal(self);
while ((*dstp++ = *srcp++) != '\0') ;
dstp--;
srcp = _stringVal(string1);
while ((*dstp++ = *srcp++) != '\0') ;
dstp--;
srcp = _stringVal(string2);
while ((*dstp++ = *srcp++) != '\0') ;
# endif
#endif
RETURN ( newString );
}
}
%}
.
^ super , string1 , string2
!
concatenate:string1 and:string2 and:string3
"return the concatenation of myself and the string arguments.
This is equivalent to self , string1 , string2 , string3
- generated by compiler when such a construct is detected"
|newString|
%{
int len1, len2, len3, len4, sz;
#if !defined(FAST_MEMCPY) && !defined(FAST_STRCPY)
REGISTER unsigned char *srcp;
#endif
REGISTER unsigned char *dstp;
OBJ new();
if ((__isString(self) || __isSymbol(self))
&& (__isString(string1) || __isSymbol(string1))
&& (__isString(string2) || __isSymbol(string2))
&& (__isString(string3) || __isSymbol(string3))) {
len1 = _stringSize(self);
len2 = _stringSize(string1);
len3 = _stringSize(string2);
len4 = _stringSize(string3);
sz = OHDR_SIZE + len1 + len2 + len3 + len4 + 1;
_qNew(newString, sz, __context);
if (newString != nil) {
_InstPtr(newString)->o_class = String;
dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
bcopy(_stringVal(self), dstp, len1);
bcopy(_stringVal(string1), dstp + len1, len2);
bcopy(_stringVal(string2), dstp + len1 + len2, len3);
bcopy(_stringVal(string3), dstp + len1 + len2 + len3, len4+1);
#else
# ifdef FAST_STRCPY
strcpy(dstp, _stringVal(self));
strcpy(dstp + len1, _stringVal(string1));
strcpy(dstp + len1 + len2, _stringVal(string2));
strcpy(dstp + len1 + len2 + len3, _stringVal(string3));
# else
srcp = _stringVal(self);
while ((*dstp++ = *srcp++) != '\0') ;
dstp--;
srcp = _stringVal(string1);
while ((*dstp++ = *srcp++) != '\0') ;
dstp--;
srcp = _stringVal(string2);
while ((*dstp++ = *srcp++) != '\0') ;
dstp--;
srcp = _stringVal(string3);
while ((*dstp++ = *srcp++) != '\0') ;
# endif
#endif
RETURN ( newString );
}
}
%}
.
^ super , string1 , string2 , string3
!
copyWith:aCharacter
"return the concatenation of myself and the argument, aCharacter
- reimplemented here for speed"
%{ /* NOCONTEXT */
int sz;
REGISTER unsigned char *dstp;
int offs;
OBJ cls, newString;
if (__isCharacter(aCharacter)) {
sz = _qSize(self) + 1;
if ((cls = _qClass(self)) != String) {
offs = __OBJS2BYTES__(_intVal(_ClassInstPtr(cls)->c_ninstvars));
sz -= offs;
} else
offs = 0;
PROTECT_CONTEXT
_qNew(newString, sz, SENDER);
UNPROTECT_CONTEXT
if (newString) {
_InstPtr(newString)->o_class = String;
dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
sz = sz - OHDR_SIZE - 1 - 1;
bcopy(_stringVal(self) + offs, dstp, sz);
dstp += sz;
#else
# ifdef FAST_STRCPY
strcpy(dstp, _stringVal(self) + offs);
dstp += sz - OHDR_SIZE - 1 - 1;
# else
{
REGISTER unsigned char *srcp;
srcp = _stringVal(self) + offs;
while ((*dstp = *srcp++) != '\0')
dstp++;
}
# endif
#endif
*dstp++ = _intVal(_characterVal(aCharacter));
*dstp = '\0';
RETURN (newString );
}
}
%}.
"fall back in case of non-character arg;
will eventually lead to an bad element signal raise"
^ super copyWith:aCharacter
!
copyFrom:start to:stop
"return the substring starting at index start, anInteger and ending
at stop, anInteger. This method will always return a string, even
if the receiver is a subclass-instance. This might change if there is a need.
- reimplemented here for speed"
%{ /* NOCONTEXT */
#if !defined(FAST_MEMCPY)
REGISTER unsigned char *srcp;
#endif
REGISTER unsigned char *dstp;
REGISTER int count;
int len, sz, index1, index2;
OBJ newString;
if (_isSmallInteger(start) && _isSmallInteger(stop)) {
len = _stringSize(self);
index1 = _intVal(start);
index2 = _intVal(stop);
if ((index1 <= index2) && (index1 > 0)) {
if (_qClass(self) != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
index1 += n;
index2 += n;
}
if (index2 <= len) {
count = index2 - index1 + 1;
PROTECT_CONTEXT
sz = OHDR_SIZE + count + 1;
_qNew(newString, sz, SENDER);
UNPROTECT_CONTEXT
if (newString != nil) {
_InstPtr(newString)->o_class = String;
dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
bcopy(_stringVal(self) + index1 - 1, dstp, count);
dstp[count] = '\0';
#else
srcp = _stringVal(self) + index1 - 1;
while (count--) {
*dstp++ = *srcp++;
}
*dstp = '\0';
#endif
RETURN ( newString );
}
}
}
}
%}.
"fall back in case of non-integer index or out-of-bound index;
will eventually lead to an out-of-bound signal raise"
^ super copyFrom:start to:stop
!
copyFrom:start
"return the substring from start, anInteger to the end.
This method will always return a string, even if the receiver
is a subclass-instance. This might change if there is a need.
- reimplemented here for speed"
%{ /* NOCONTEXT */
#if !defined(FAST_MEMCPY)
REGISTER unsigned char *srcp;
#endif
REGISTER unsigned char *dstp;
REGISTER int count;
int len, index1, sz;
OBJ newString;
if (_isSmallInteger(start)) {
len = _stringSize(self);
index1 = _intVal(start);
if (index1 > 0) {
if (_qClass(self) != String) {
int n = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
index1 += n;
}
if (index1 <= len) {
count = len - index1 + 1;
PROTECT_CONTEXT
sz = OHDR_SIZE + count + 1;
_qNew(newString, sz, SENDER);
UNPROTECT_CONTEXT
if (newString != nil) {
_InstPtr(newString)->o_class = String;
dstp = _stringVal(newString);
#ifdef FAST_MEMCPY
bcopy(_stringVal(self) + index1 - 1, dstp, count);
dstp[count] = '\0';
#else
srcp = _stringVal(self) + index1 - 1;
while (count--) {
*dstp++ = *srcp++;
}
*dstp = '\0';
#endif
RETURN ( newString );
}
}
}
}
%}.
"fall back in case of non-integer index or out-of-bound index;
will eventually lead to an out-of-bound signal raise"
^ super copyFrom:start
! !
!String methodsFor:'filling and replacing'!
replaceFrom:start to:stop with:aString startingAt:repStart
"replace the characters starting at index start, anInteger and ending
at stop, anInteger with characters from aString starting at repStart.
- reimplemented here for speed"
%{ /* NOCONTEXT */
REGISTER unsigned char *srcp, *dstp;
REGISTER int count;
int len, index1, index2;
int repLen, repIndex;
if ((__isString(aString) || __isSymbol(aString))
&& __isString(self)
&& _isSmallInteger(start)
&& _isSmallInteger(stop)) {
len = _stringSize(self);
index1 = _intVal(start);
index2 = _intVal(stop);
count = index2 - index1 + 1;
if (count <= 0) {
RETURN (self);
}
if ((index2 <= len) && (index1 > 0)) {
repLen = _stringSize(aString);
repIndex = _intVal(repStart);
if ((repIndex > 0) && ((repIndex + count - 1) <= repLen)) {
srcp = _stringVal(aString) + repIndex - 1;
dstp = _stringVal(self) + index1 - 1;
if (aString == self) {
/* take care of overlapping copy */
if (srcp < dstp) {
/* must do a reverse copy */
srcp += count;
dstp += count;
while (count-- > 0) {
*--dstp = *--srcp;
}
RETURN (self);
}
}
#ifdef FAST_MEMCPY
bcopy(srcp, dstp, count);
#else
while (count-- > 0) {
*dstp++ = *srcp++;
}
#endif
RETURN (self);
}
}
}
%}
.
^ super replaceFrom:start to:stop with:aString startingAt:repStart
!
replaceAll:oldCharacter by:newCharacter
"replace all oldCharacters by newCharacter in the receiver"
%{ /* NOCONTEXT */
REGISTER unsigned char *srcp;
REGISTER unsigned oldVal, newVal;
if (__isCharacter(oldCharacter)
&& __isCharacter(newCharacter)
&& __isString(self)) {
srcp = (unsigned char *)_stringVal(self);
oldVal = _intVal(_characterVal(oldCharacter));
newVal = _intVal(_characterVal(newCharacter));
while (*srcp) {
if (*srcp == oldVal)
*srcp = newVal;
srcp++;
}
RETURN ( self );
}
%}
.
^ super replaceAll:oldCharacter by:newCharacter
!
reverse
"in-place reverse the characters of the string."
"Q: is there a need to redefine it here ?"
%{ /* NOCONTEXT */
REGISTER char c;
REGISTER unsigned char *hip, *lowp;
if (__isString(self)) {
lowp = _stringVal(self);
hip = lowp + _stringSize(self) - 1;
while (lowp < hip) {
c = *lowp;
*lowp = *hip;
*hip = c;
lowp++;
hip--;
}
RETURN ( self );
}
%}
.
^ super reverse
!
withCRs
"return a copy of the receiver, where
all \-characters are replaced by newline characters
- reimplemented here for speed"
|newString|
%{
OBJ new();
REGISTER char c;
REGISTER unsigned char *srcp, *dstp;
int len, offs;
len = _qSize(self);
if (_qClass(self) != String) {
offs = __OBJS2BYTES__(_intVal(_ClassInstPtr(_qClass(self))->c_ninstvars));
len -= offs;
} else
offs = 0;
_qNew(newString, len, __context);
if (newString != nil) {
_InstPtr(newString)->o_class = String;
srcp = _stringVal(self) + offs;
dstp = _stringVal(newString);
while (c = *srcp++)
if (c == '\\')
*dstp++ = '\n';
else
*dstp++ = c;
*dstp++ = '\0';
RETURN ( newString );
}
%}
.
^ super withCRs
!
atAllPut:aCharacter
"replace all characters with aCharacter
- reimplemented here for speed"
%{ /* NOCONTEXT */
REGISTER int byteValue;
#ifndef FAST_MEMSET
REGISTER unsigned char *dst;
REGISTER int l;
#endif
if (__isCharacter(aCharacter) && __isString(self)) {
byteValue = _intVal(_characterVal(aCharacter));
#ifdef FAST_MEMSET
memset(_stringVal(self), byteValue, _qSize(self) - OHDR_SIZE - 1);
#else
dst = _stringVal(self);
l = _qSize(self) - OHDR_SIZE - 1;
while (l >= 8) {
dst[0] = dst[1] = dst[2] = dst[3] = byteValue;
dst[4] = dst[5] = dst[6] = dst[7] = byteValue;
dst += 8;
l -= 8;
}
while (l--)
*dst++ = byteValue;
#endif
RETURN ( self );
}
%}
.
^ super atAllPut:aCharacter
"
(String new:10) atAllPut:$*
String new:10 withAll:$*
"
!
withoutSpaces
"return a copy of myself without leading and trailing spaces.
Notice, this is different from String>>withoutSeparators."
|startIndex "{ Class: SmallInteger }"
endIndex "{ Class: SmallInteger }"
sz blank|
startIndex := 0.
%{
REGISTER unsigned char *cp0;
REGISTER unsigned char *cp;
/* ignore instances of subclasses ... */
if (_qClass(self) == String) {
cp = cp0 = _stringVal(self);
while (*cp == ' ') cp++;
startIndex = _MKSMALLINT(cp - cp0 + 1);
cp = cp + strlen(cp) - 1;
while ((cp >= cp0) && (*cp == ' ')) cp--;
endIndex = _MKSMALLINT(cp - cp0 + 1);
}
%}
.
sz := self size.
startIndex == 0 ifTrue:[
startIndex := 1.
endIndex := sz.
blank := Character space.
[(startIndex < endIndex) and:[(self at:startIndex) == blank]] whileTrue:[
startIndex := startIndex + 1
].
[(endIndex > 1) and:[(self at:endIndex) == blank]] whileTrue:[
endIndex := endIndex - 1
]
].
startIndex > endIndex ifTrue:[
^ ''
].
((startIndex == 1) and:[endIndex == sz]) ifTrue:[
^ self
].
^ self copyFrom:startIndex to:endIndex
!
withoutSeparators
"return a copy of myself without leading and trailing whitespace.
Notice, this is different from String>>withoutSpaces."
|startIndex "{ Class: SmallInteger }"
endIndex "{ Class: SmallInteger }"
sz|
startIndex := 0.
%{
REGISTER unsigned char *cp0;
REGISTER unsigned char *cp;
REGISTER unsigned char c;
/* ignore instances of subclasses ... */
if (_qClass(self) == String) {
cp = cp0 = _stringVal(self);
c = *cp;
while ((c == ' ') || (c == '\n') || (c == '\t')
|| (c == '\r') || (c == '\f')) {
cp++;
c = *cp;
}
startIndex = _MKSMALLINT(cp - cp0 + 1);
cp = cp + strlen(cp) - 1;
while ((cp >= cp0) && (*cp == ' ')) cp--;
c = *cp;
while ((cp >= cp0) &&
((c == ' ') || (c == '\n') || (c == '\t')
|| (c == '\r') || (c == '\f'))) {
cp--;
c = *cp;
}
endIndex = _MKSMALLINT(cp - cp0 + 1);
}
%}
.
sz := self size.
startIndex == 0 ifTrue:[
startIndex := 1.
endIndex := self size.
[(startIndex < endIndex) and:[(self at:startIndex) isSeparator]] whileTrue:[
startIndex := startIndex + 1
].
[(endIndex > 1) and:[(self at:endIndex) isSeparator]] whileTrue:[
endIndex := endIndex - 1
].
].
startIndex > endIndex ifTrue:[
^ ''
].
((startIndex == 1) and:[endIndex == sz]) ifTrue:[
^ self
].
^ self copyFrom:startIndex to:endIndex
! !
!String methodsFor:'queries'!
encoding
"assume iso8859 encoding"
^ #iso8859
!
knownAsSymbol
"return true, if there is a symbol with same characters in the
system - use to check for existance of a symbol without creating one"
%{ /* NOCONTEXT */
extern OBJ _KNOWNASSYMBOL();
RETURN ( _KNOWNASSYMBOL(_stringVal(self)) );
%}
!
isEmpty
"return true if the receiver is empty (i.e. if size == 0)
Redefined here for performance"
%{ /* NOCONTEXT */
OBJ cls;
cls = _qClass(self);
if ((cls == String) || (cls == Symbol)) {
RETURN ( (_stringSize(self) == 0) ? true : false);
}
%}
.
^ super isEmpty
! !