--- a/CharacterArray.st Thu Dec 20 11:48:59 2012 +0000
+++ b/CharacterArray.st Sat Jan 19 01:30:00 2013 +0000
@@ -11,7 +11,7 @@
"
"{ Package: 'stx:libbasic' }"
-ByteArray subclass:#CharacterArray
+UninterpretedBytes variableByteSubclass:#CharacterArray
instanceVariableNames:''
classVariableNames:'PreviousMatch DecoderTables EncoderTables DecodingFailedSignal
EncodingFailedSignal LastString LastShiftTable'
@@ -326,6 +326,7 @@
"
! !
+
!CharacterArray class methodsFor:'pattern matching'!
matchEscapeCharacter
@@ -677,6 +678,7 @@
^ self == CharacterArray
! !
+
!CharacterArray methodsFor:'Compatibility-ANSI'!
addLineDelimiters
@@ -1012,12 +1014,6 @@
"
!
-isWideString
- ^ self bitsPerCharacter > 8
-
- "Created: / 05-09-2011 / 23:13:16 / cg"
-!
-
lastSpacePosition
"return the index of the last space character; 0 if there is none"
@@ -1559,6 +1555,7 @@
! !
+
!CharacterArray methodsFor:'character searching'!
includesMatchCharacters
@@ -1818,18 +1815,16 @@
therefore 'foo' = 'Foo' will return false.
Since this is incompatible to ST-80 (at least, V2.x) , this may change."
- |mySize "{ Class: SmallInteger }"
- otherSize |
+ |mySize "{ Class: SmallInteger }"|
(aString isString or:[aString species == self species]) ifFalse:[
- ^ false
+ ^ false
].
mySize := self size.
- otherSize := aString size.
- mySize == otherSize ifFalse:[^ false].
+ mySize ~~ (aString size) ifTrue:[^ false].
1 to:mySize do:[:index |
- (self at:index) = (aString at:index) ifFalse:[^ false].
+ (self at:index) = (aString at:index) ifFalse:[^ false].
].
^ true
@@ -1870,6 +1865,21 @@
"Modified: 22.4.1996 / 15:55:00 / cg"
!
+after:aString
+ "Compare the receiver with the argument and return true if the
+ receiver should come after the argument in a sorted list.
+ Otherwise return false.
+ NOTE: The comparison should be language specific, depending on the value of
+ LC_COLLATE, which is initialized from the environment.
+
+ Currently it is for Strings, but not for UnicodeStrings...
+
+ STUPID:
+ #after has a completely different meaning in SeqColl ..."
+
+ ^ (self compareCollatingWith:aString) > 0
+!
+
compareCaselessWith:aString
"Compare the receiver against the argument, ignoreing case.
Return 1 if the receiver is greater, 0 if equal and -1 if less than the argument.
@@ -1900,6 +1910,23 @@
"Modified: 22.4.1996 / 15:56:07 / cg"
!
+compareCollatingWith:aString
+ "Compare the receiver with the argument and return 1 if the receiver is
+ greater, 0 if equal and -1 if less than the argument in a sorted list.
+ The comparison is language specific, depending on the value of
+ LC_COLLATE, which is in the shell environment."
+
+ "TODO not yet defined for unicode"
+ <resource: #todo>
+
+ |s|
+
+ (s := self string) ~~ self ifTrue:[
+ ^ s compareCollatingWith:aString
+ ].
+ ^ self compareWith:aString
+!
+
compareWith:aString
"Compare the receiver with the argument and return 1 if the receiver is
greater, 0 if equal and -1 if less than the argument.
@@ -1971,8 +1998,18 @@
"/ immediately after any change, execute (maybe in a debugger):
"/ Set allSubInstancesDo:[:s | s rehash]
- ^ self hash_dragonBook
- "/ ^ self hash_sdbm.
+ "/ ^ self hash_dragonBook
+
+ |h|
+
+ "/
+ "/ this is the sdbm algorithm
+ "/
+ h := 0.
+ self do:[:char |
+ h := (65599 times:h) plus:char codePoint.
+ ].
+ ^ h
"
'a' hash
@@ -1985,6 +2022,44 @@
'ab' asArray hash
"
+ "
+ |syms ms|
+
+ syms := Symbol allInstances.
+ Transcript show:'syms: '; showCR:syms size.
+ Transcript show:'sdbm hashes: '; showCR:(syms collect:[:s| s hash]) asSet size.
+ Transcript show:'dragonBook hashes: '; showCR:(syms collect:[:s| s hash_dragonBook]) asSet size.
+
+ ms := Time millisecondsToRun:[
+ 10 timesRepeat:[
+ syms do:[:each| each hash].
+ ].
+ ].
+ Transcript show:'sdbm hash: '; showCR:ms.
+
+ ms := Time millisecondsToRun:[
+ 10 timesRepeat:[
+ syms do:[:each| each hash_dragonBook].
+ ].
+ ].
+ Transcript show:'dragonBook: '; showCR:ms.
+
+ syms := syms collect:[:each| each asUnicode16String].
+ ms := Time millisecondsToRun:[
+ 10 timesRepeat:[
+ syms do:[:each| each hash].
+ ].
+ ].
+ Transcript show:'unicode sdbm hash: '; showCR:ms.
+
+ ms := Time millisecondsToRun:[
+ 10 timesRepeat:[
+ syms do:[:each| each hash_dragonBook].
+ ].
+ ].
+ Transcript show:'unicode dragonBook:'; showCR:ms.
+ "
+
"Modified: / 26-12-2011 / 14:09:07 / cg"
!
@@ -2000,21 +2075,20 @@
self reverseDo:[:char |
"/ Sorry, stc cannot compile this (as of 10.9.2007)
"/ h := (h bitShift:4) + char asciiValue.
- h := (h bitShift:4).
- h:= h + char codePoint.
- h := h bitAnd:16rFFFFFFFF.
- g := (h bitAnd: 16rF0000000).
- g ~~ 0 ifTrue:[
- h := h bitXor:(g bitShift:-24).
- h := h bitXor:g.
- ].
+ h := (h bitShift:4).
+ h := h + char codePoint.
+ h := h bitAnd:16rFFFFFFFF.
+ g := (h bitAnd: 16rF0000000).
+ g ~~ 0 ifTrue:[
+ h := h bitXor:(g bitShift:-24).
+ h := h bitXor:g.
+ ].
].
"/
"/ multiply by large prime to spread values
"/ This speeds up Set and Dictionary by a factor of 10!!
"/
- h := h * 31415821.
- h := h bitAnd:16r3fffffff.
+ h := h times:31415821.
^ h
"
@@ -2031,30 +2105,6 @@
"Created: / 26-12-2011 / 13:46:06 / cg"
!
-hash_sdbm
- "return an integer useful as a hash-key"
-
- |h|
-
- "/
- "/ this is the sdbm algorithm
- "/
- h := 0.
- self do:[:char |
- h := (h * 65599) + char codePoint.
- h := h bitAnd:16r3FFFFFFF.
- ].
- ^ h
-
- "
- 'a' hash_sdbm
- 'aa' hash_sdbm
- 'ab' hash_sdbm
- "
-
- "Created: / 26-12-2011 / 13:48:13 / cg"
-!
-
levenshteinTo:aString
"return the levenshtein distance to the argument, aString;
this value corresponds to the number of replacements that have to be
@@ -2361,11 +2411,12 @@
!
asByteArray
- |bytes sz bytesPerCharacter idx|
-
- self string ~~ self ifTrue:[
- "/ for text and other wrappers
- ^ self string asByteArray
+ |bytes sz bytesPerCharacter idx str|
+
+ str := self string.
+ str ~~ self ifTrue:[
+ "/ for text and other wrappers
+ ^ str asByteArray
].
"/ for real strings, a fallback
@@ -2374,19 +2425,19 @@
bytes := ByteArray new:(sz * bytesPerCharacter).
idx := 1.
self do:[:char |
- |code|
-
- code := char codePoint.
- bytesPerCharacter == 2 ifTrue:[
- bytes unsignedShortAt:idx put:code
- ] ifFalse:[
- bytesPerCharacter == 4 ifTrue:[
- bytes unsignedLongAt:idx put:code
- ] ifFalse:[
- bytes at:idx put:code
- ].
- ].
- idx := idx + bytesPerCharacter.
+ |code|
+
+ code := char codePoint.
+ bytesPerCharacter == 2 ifTrue:[
+ bytes unsignedShortAt:idx put:code
+ ] ifFalse:[
+ bytesPerCharacter == 4 ifTrue:[
+ bytes unsignedLongAt:idx put:code
+ ] ifFalse:[
+ bytes at:idx put:code
+ ].
+ ].
+ idx := idx + bytesPerCharacter.
].
^ bytes
@@ -2758,11 +2809,12 @@
bitsPerCharacter := newStr bitsPerCharacter.
1 to:mySize do:[:i |
- c := (self at:i) asLowercase.
- c bitsPerCharacter > bitsPerCharacter ifTrue:[
- newStr := c stringSpecies fromString:newStr.
- ].
- newStr at:i put:c
+ c := (self at:i) asLowercase.
+ (c bitsPerCharacter > bitsPerCharacter
+ and:[c stringSpecies ~= newStr stringSpecies]) ifTrue:[
+ newStr := c stringSpecies fromString:newStr.
+ ].
+ newStr at:i put:c
].
^ newStr
@@ -2883,7 +2935,7 @@
"if possible, return the receiver converted to a 'normal' string.
It is only possible, if there are no characters with codePoints above 255 in the receiver."
- self bitsPerCharacter == 8 ifTrue:[^ self].
+ self isWideString ifFalse:[^ self].
(self contains:[:char | char codePoint > 255]) ifFalse:[^ self asSingleByteString].
^ self
@@ -3192,14 +3244,6 @@
^ self
"Modified: 27.4.1996 / 13:29:30 / cg"
-!
-
-writeStream
- "return a stream for writing onto the receiver.
- Redefined to return a CharacterWriteStream which automatically checks for the bytesPerCharacter
- and replaces the stream-buffer as required."
-
- ^ CharacterWriteStream on:self
! !
!CharacterArray methodsFor:'copying'!
@@ -3796,14 +3840,16 @@
utf8Decoded
"Interpreting myself as an UTF-8 representation, decode and return the decoded string."
- |in out is16Bit c|
-
- is16Bit := false.
+ |in out|
+
+ (self isWideString or:[self contains8BitCharacters]) ifFalse:[
+ "speed up common case"
+ ^ self.
+ ].
out := CharacterWriteStream on:(String uninitializedNew:self size).
in := self readStream.
[in atEnd] whileFalse:[
- c := Character utf8DecodeFrom:in.
- out nextPut:c.
+ out nextPut:(Character utf8DecodeFrom:in).
].
^ out contents
@@ -3821,12 +3867,12 @@
|utf8Encoding original readBack|
1 to:16rFFFF do:[:ascii |
- original := (Character value:ascii) asString.
- utf8Encoding := original utf8Encoded.
- readBack := utf8Encoding utf8Decoded.
- readBack = original ifFalse:[
- self halt
- ]
+ original := (Character value:ascii) asString.
+ utf8Encoding := original utf8Encoded.
+ readBack := utf8Encoding utf8Decoded.
+ readBack = original ifFalse:[
+ self halt
+ ]
]
"
!
@@ -3858,19 +3904,27 @@
utf8Encoded
"Return my UTF-8 representation as a new String"
- |s|
-
- s := WriteStream on:(String uninitializedNew:self size).
+ |s sz|
+
+ (self isWideString or:[self contains8BitCharacters]) ifFalse:[
+ "speed up common case"
+ ^ self.
+ ].
+ "We already know, that we need more space, just a rough estimation"
+ sz := self size.
+ s := WriteStream on:(String uninitializedNew:(sz+2+(sz//3))).
s nextPutAllUtf8:self.
^ s contents
"
- 'abcde' utf8Encoded
+ 'abcde1234' utf8Encoded
+ 'abcdeäöüß' utf8Encoded
"
"Modified: / 11-05-2010 / 19:12:37 / cg"
! !
+
!CharacterArray methodsFor:'matching - glob expressions'!
compoundMatch:aString
@@ -4314,6 +4368,8 @@
"Created: / 08-03-2012 / 03:11:11 / cg"
! !
+
+
!CharacterArray methodsFor:'padded copying'!
centerPaddedTo:newSize
@@ -4567,6 +4623,7 @@
"
! !
+
!CharacterArray methodsFor:'queries'!
bitsPerCharacter
@@ -4576,12 +4633,12 @@
|string max|
(string := self string) ~~ self ifTrue:[
- ^ string bitsPerCharacter
+ ^ string bitsPerCharacter
].
max := 8.
self do:[:eachCharacter |
- max := max max:(eachCharacter bitsPerCharacter)
+ max := max max:(eachCharacter bitsPerCharacter)
].
^ max
@@ -4837,6 +4894,7 @@
"Modified: 17.4.1997 / 12:50:23 / cg"
! !
+
!CharacterArray methodsFor:'special string converting'!
expandPlaceholdersWith:argArrayOrDictionary
@@ -5641,6 +5699,7 @@
"
! !
+
!CharacterArray methodsFor:'substring searching'!
findString:subString
@@ -6044,6 +6103,10 @@
"
!
+isWideString
+ ^ self bitsPerCharacter > 8
+!
+
numArgs
"treating the receiver as a message selector, return how many arguments would it take"
@@ -6076,7 +6139,11 @@
!
partsIfSelector
- "treat the receiver as a message selector, return a collection of parts."
+ "treat the receiver as a message selector, return a collection of parts.
+ Notice: this is more tolerant than Smalltalk's syntax would suggest;
+ especially it allows for empty keyword parts between colons.
+ This is not (and should not be checked here), to allow parsing of
+ degenerate selectors as appearing with objectiveC."
|idx1 "{ Class: SmallInteger }"
coll idx2 sz|
@@ -6085,19 +6152,21 @@
idx1 := 1.
sz := self size.
[true] whileTrue:[
- idx2 := self indexOf:$: startingAt:idx1 + 1.
- (idx2 == 0 or:[idx2 == sz]) ifTrue:[
- coll add:(self copyFrom:idx1).
- ^ coll
- ].
- coll add:(self copyFrom:idx1 to:idx2).
- idx1 := idx2 + 1
+ idx2 := self indexOf:$: startingAt:idx1.
+ (idx2 == 0 or:[idx2 == sz]) ifTrue:[
+ coll add:(self copyFrom:idx1).
+ ^ coll
+ ].
+ coll add:(self copyFrom:idx1 to:idx2).
+ idx1 := idx2 + 1
].
"
- 'foo:bar:' partsIfSelector
- #foo:bar: partsIfSelector
- 'hello' partsIfSelector
+ 'foo:' partsIfSelector
+ 'foo:bar:' partsIfSelector
+ 'foo::::' partsIfSelector
+ #foo:bar: partsIfSelector
+ 'hello' partsIfSelector
'+' partsIfSelector
"
! !
@@ -6122,15 +6191,12 @@
!CharacterArray class methodsFor:'documentation'!
version
- ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.481 2012/10/31 19:10:50 cg Exp $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.491 2013-01-16 12:31:17 stefan Exp $'
!
version_CVS
- ^ '§Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.481 2012/10/31 19:10:50 cg Exp §'
-!
-
-version_SVN
- ^ '$Id: CharacterArray.st 10876 2012-11-30 17:19:23Z vranyj1 $'
+ ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.491 2013-01-16 12:31:17 stefan Exp $'
! !
+
CharacterArray initialize!