CharacterSet.st
changeset 3459 c479aa5988ea
parent 3458 7f77f7f5dbd9
child 3460 d965c11358b7
--- a/CharacterSet.st	Wed Dec 24 12:36:29 2014 +0100
+++ b/CharacterSet.st	Wed Dec 24 13:49:58 2014 +0100
@@ -46,10 +46,41 @@
 
 !CharacterSet class methodsFor:'instance creation'!
 
+allSingleByteCharacters
+    "return a new character set, which contains all single-byte characters"
+
+    ^ self new addAllSingleByteCharacters
+!
+
+empty
+    "return a new, empty character set"
+
+    ^ self new
+!
+
 new
+    "return a new, empty character set"
+
     ^ self basicNew initialize
 
     "Created: / 28-01-2011 / 17:29:24 / cg"
+!
+
+nonSeparators
+    "return a new character set, which contains any but whitespace characters"
+
+    ^ self new 
+        addAllSingleByteCharacters;
+        removeAll:(Character separators); 
+        yourself
+!
+
+separators
+    "return a new character set, which contains all whitespace characters"
+
+    ^ self new 
+        addAll:(Character separators); 
+        yourself
 ! !
 
 !CharacterSet methodsFor:'accessing'!
@@ -79,20 +110,6 @@
     "Created: / 28-01-2011 / 17:44:21 / cg"
 !
 
-includes:aCharacter
-    "Return true if the set contains aCharacter"
-
-    |cp byteIndex bitIndex|
-
-    cp := aCharacter codePoint.
-    (cp > 255) ifTrue:[^ false].
-
-    byteIndex := (cp // 8) + 1.
-    bitIndex := (cp \\ 8) + 1.
-    byteIndex > bits size ifTrue:[^ false].
-    ^ ((bits at:byteIndex) bitAt:bitIndex) ~~ 0
-!
-
 remove:aCharacter ifAbsent:exceptionValue
     |cp byteIndex bitIndex mask|
 
@@ -125,15 +142,35 @@
 !CharacterSet methodsFor:'copying'!
 
 postCopy
+    "make sure that the bitmap is not shared with the copy"
+
     bits := bits copy
 ! !
 
 !CharacterSet methodsFor:'initialization'!
 
+addAllSingleByteCharacters
+    bits := ByteArray new:(256 / 8) withAll:16rFF.
+
+    "
+     self assert:(
+        CharacterSet new addAllCharacters
+            includesAll:((Character value:0) to:(Character value:255)))
+
+     self assert:(
+        CharacterSet allCharacters
+            includesAll:((Character value:0) to:(Character value:255)))
+    "
+!
+
 initialize
     bits := nil "/ empty
 
     "Created: / 28-01-2011 / 17:29:48 / cg"
+!
+
+setByteArrayMap:aByteArray
+    bits := aByteArray
 ! !
 
 !CharacterSet methodsFor:'queries'!
@@ -160,6 +197,20 @@
     "Created: / 28-01-2011 / 17:39:16 / cg"
 !
 
+includes:aCharacter
+    "Return true if the set contains aCharacter"
+
+    |cp byteIndex bitIndex|
+
+    cp := aCharacter codePoint.
+    (cp > 255) ifTrue:[^ false].
+
+    byteIndex := (cp // 8) + 1.
+    bitIndex := (cp \\ 8) + 1.
+    byteIndex > bits size ifTrue:[^ false].
+    ^ ((bits at:byteIndex) bitAt:bitIndex) ~~ 0
+!
+
 size
     |n|
 
@@ -174,9 +225,24 @@
     "Created: / 28-01-2011 / 17:35:21 / cg"
 ! !
 
+!CharacterSet methodsFor:'set operations'!
+
+complement
+    "return a character set containing all characters (from codepoint 0 to 255),
+     which are NOT included in the receiver"
+
+    ^ self class allSingleByteCharacters
+        removeAll:self;
+        yourself
+! !
+
 !CharacterSet class methodsFor:'documentation'!
 
+version
+    ^ '$Header: /cvs/stx/stx/libbasic2/CharacterSet.st,v 1.4 2014-12-24 12:49:58 cg Exp $'
+!
+
 version_CVS
-    ^ '$Header: /cvs/stx/stx/libbasic2/CharacterSet.st,v 1.3 2014-12-24 11:36:29 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic2/CharacterSet.st,v 1.4 2014-12-24 12:49:58 cg Exp $'
 ! !