CharacterArray.st
branchjv
changeset 17921 4069fe8e9039
parent 17911 a99f15c5efa5
child 17922 701184329b01
--- a/CharacterArray.st	Sat Feb 11 22:04:14 2012 +0000
+++ b/CharacterArray.st	Mon Feb 13 19:19:41 2012 +0000
@@ -282,7 +282,6 @@
     "Created: 3.8.1997 / 18:16:40 / cg"
 ! !
 
-
 !CharacterArray class methodsFor:'cleanup'!
 
 lowSpaceCleanup
@@ -1281,6 +1280,19 @@
 					 with:str7)
 !
 
+bindWith:str1 with:str2 with:str3 with:str4 with:str5 with:str6 with:str7 with:str8
+    "return a copy of the receiver, where a '%1' .. '%8' escapes
+     are replaced by str1 .. str8 respectively.
+     This has been added for VisualAge compatibility."
+
+    ^ self expandPlaceholdersWith:(Array with:str1 with:str2
+                                         with:str3 with:str4
+                                         with:str5 with:str6
+                                         with:str7 with:str8)
+
+    "Created: / 06-02-2012 / 10:33:18 / cg"
+!
+
 bindWithArguments:anArrayOfStrings
     "return a copy of the receiver, where a '%i' escape
      is replaced by the coresponding string from the argument array.
@@ -3128,38 +3140,6 @@
     "Modified: 27.4.1996 / 13:29:30 / cg"
 !
 
-tokensBasedOn:aCharacter
-    "this is an ST-80 alias for the ST/X method
-	asCollectionOfSubstringsSeparatedBy:"
-
-    ^ self asCollectionOfSubstringsSeparatedBy:aCharacter
-
-    "
-     'hello:world:isnt:this nice' tokensBasedOn:$:
-     'foo,bar,baz' tokensBasedOn:$,
-     '/etc/passwd' asFilename readStream nextLine tokensBasedOn:$:
-    "
-!
-
-withoutAllSpaces
-    ^ self asCollectionOfWords asStringWith:''.
-"/    |col string|
-"/
-"/    col := self asCollectionOfWords.
-"/    string := String new.
-"/    col do:[:el |
-"/       string := string,el
-"/    ].
-"/    ^string
-
-    "
-     'hello wwww'  withoutAllSpaces
-     'hel   lo www   w'  withoutAllSpaces
-    "
-
-    "Modified: / 18.7.1998 / 22:53:08 / cg"
-!
-
 writeStream
     "return a stream for writing onto the receiver.
      Redefined to return a CharacterWriteStream which automatically checks for the bytesPerCharacter
@@ -4954,6 +4934,85 @@
     "
 !
 
+tokensBasedOn:aCharacter
+    "this is an ST-80 alias for the ST/X method
+	asCollectionOfSubstringsSeparatedBy:"
+
+    ^ self asCollectionOfSubstringsSeparatedBy:aCharacter
+
+    "
+     'hello:world:isnt:this nice' tokensBasedOn:$:
+     'foo,bar,baz' tokensBasedOn:$,
+     '/etc/passwd' asFilename readStream nextLine tokensBasedOn:$:
+    "
+!
+
+withCEscapes
+    "return a new string consisting of receivers characters
+     with all special and unprintable characters replaced by \X-character escapes.
+     (similar to the way C-language literal Strings are represented).
+     The resulting string will contain only 7-bit ascii characters.
+     Emphasis is not supported.
+     The following escapes are generated:
+        \r      return character
+        \n      newline character
+        \t      tab character
+        \\      the \ character itself
+        \xnn    two digit hex number defining the characters ascii value
+        \unnnn  four digit hex number defining the characters ascii value
+        \Unnnnnnnn  eight digit hex number defining the characters ascii value
+    "
+
+    |anyEscapeNeeded out seq|
+
+    "
+     first, check if any escape is needed and return the receiver unchanged if not
+    "
+    anyEscapeNeeded := self contains:[:ch | (ch codePoint between:32 and:126) not ].
+    anyEscapeNeeded ifFalse:[ ^ self ].
+
+    self hasChangeOfEmphasis ifTrue:[ self error:'emphasis not supported' ].
+
+    out := WriteStream on:(String new:self size-1).
+
+    self do:[:ch |
+        |cp|
+
+        (ch codePoint between:32 and:126) ifTrue:[
+            out nextPut:ch
+        ] ifFalse:[
+            ch == Character return ifTrue:[
+                seq := '\r'
+            ] ifFalse:[ ch == Character nl ifTrue:[
+                seq := '\n'
+            ] ifFalse:[ ch == Character tab ifTrue:[
+                seq := '\t'
+            ] ifFalse:[ ch == $\ ifTrue:[
+                seq := '\\'
+            ] ifFalse:[ 
+                cp := ch codePoint.
+                cp <= 16rFF ifTrue:[
+                    seq := '\x' , (cp printStringRadix:16 padTo:2)   
+                ] ifFalse:[
+                    cp <= 16rFFFF ifTrue:[
+                        seq := '\u' , (cp printStringRadix:16 padTo:4)   
+                    ] ifFalse:[
+                        seq := '\U',(cp printStringRadix:16 padTo:8)
+                    ]
+                ]
+            ]]]].
+            out nextPutAll:seq
+        ].
+    ].
+    ^ out contents
+
+    "
+     'hello\nworld\na\n\tnice\n\t\tstring' withoutCEscapes withCEscapes  
+    "
+
+    "Created: / 25-01-2012 / 11:08:16 / cg"
+!
+
 withCRs
     "return a new string consisting of receivers characters
      with all \-characters replaced by cr-characters."
@@ -4968,142 +5027,12 @@
 !
 
 withEscapes
-    "return a new string consisting of receivers characters
-     with all \X-character escapes replaced by corresponding-characters.
-     (similar to the way C-language Strings are converted).
-     The following escapes are supported:
-	\r      return character
-	\n      newline character
-	\b      backspace character
-	\f      formfeed character
-	\t      tab character
-	\e      escape character
-	\\      the \ character itself
-	\nnn    three digit octal number defining the characters ascii value
-	\other  other
-
-     Notice, that \' is NOT a valid escape, since the general syntax of
-     string constants is not affected by this method.
-
-     Although easily implementable, this is NOT done automatically
-     by the compiler (due to a lack of a language standard for this).
-     However, the compiler may detect sends ot #withEscapes to string literals
-     and place a modified string constant into the binary/byte-code.
-     Therefore, no runtime penalty will be payed for using these escapes.
-     (not in pre 2.11 versions)
-    "
-
-    |sz      "{ SmallInteger }"
-     newSize "{ SmallInteger }"
-     srcIdx  "{ SmallInteger }"
-     dstIdx  "{ SmallInteger }"
-     val     "{ SmallInteger }"
-     newString next hasEmphasis e|
-
-    "
-     first, count the number of escapes, to allow preallocation
-     of the new string ...
-     (it is faster to scan the string twice than to reallocate it multiple
-      times in a WriteStream)
-    "
-    sz := newSize := self size.
-    srcIdx := 1.
-    [(srcIdx := self indexOf:$\ startingAt:srcIdx) ~~ 0] whileTrue:[
-	srcIdx == sz ifFalse:[
-	    newSize := newSize - 1.
-	    srcIdx := srcIdx + 1.
-	    next := self at:srcIdx.
-	    next == $0 ifTrue:[
-		[srcIdx < sz and:[next isDigit]] whileTrue:[
-		    newSize := newSize - 1. srcIdx := srcIdx + 1. next := self at:srcIdx.
-		]
-	    ].
-	].
-	srcIdx := srcIdx + 1.
-    ].
-
-    newSize == sz ifTrue:[
-	^ self
-    ].
-
-    newString := self species new:newSize.
-
-    hasEmphasis := self hasChangeOfEmphasis.
-
-    "
-     copy over, replace escapes
-    "
-    srcIdx := dstIdx := 1.
-    [srcIdx <= sz] whileTrue:[
-	next := self at:srcIdx.
-	hasEmphasis ifTrue:[
-	    e := self emphasisAt:srcIdx
-	].
-	srcIdx := srcIdx + 1.
-	next == $\ ifTrue:[
-	    srcIdx <= sz ifTrue:[
-		next := self at:srcIdx.
-		srcIdx := srcIdx + 1.
-		next == $r ifTrue:[
-		    next := Character return
-		] ifFalse:[
-		    next == $n ifTrue:[
-			next := Character nl
-		    ] ifFalse:[
-			next == $b ifTrue:[
-			    next := Character backspace
-			] ifFalse:[
-			    next == $f ifTrue:[
-				next := Character newPage
-			    ] ifFalse:[
-				next == $t ifTrue:[
-				    next := Character tab
-				] ifFalse:[
-				    next == $e ifTrue:[
-					next := Character esc
-				    ] ifFalse:[
-					next == $0 ifTrue:[
-					    val := 0.
-					    [next notNil and:[next isDigit]] whileTrue:[
-						val := val * 8 + next digitValue.
-						srcIdx <= sz ifTrue:[
-						    next := self at:srcIdx.
-						    srcIdx := srcIdx + 1.
-						] ifFalse:[
-						    next := nil
-						]
-					    ].
-					    next := Character value:val.
-					]
-				    ]
-				]
-			    ]
-			]
-		    ]
-		].
-	    ].
-	].
-	newString at:dstIdx put:next.
-	hasEmphasis ifTrue:[
-	    newString emphasisAt:dstIdx put:e
-	].
-	dstIdx := dstIdx + 1.
-    ].
-    ^ newString
-
-    "
-     'hello world' withEscapes
-     'hello\world' withEscapes
-     'hello\world\' withEscapes
-     'hello world\' withEscapes
-     'hello\tworld' withEscapes
-     'hello\nworld\na\n\tnice\n\t\tstring' withEscapes
-     'hello\tworld\n' withEscapes
-     'hello\010world' withEscapes
-     'hello\r\nworld' withEscapes
-    "
-
-    "Modified: 12.5.1996 / 12:53:34 / cg"
+    "has been renamed; the name withEscapes is misleading"
+
+    self obsoleteMethodWarning:'use withoutCEscapes'.
+    ^ self withoutCEscapes.
+
+    "Modified: / 25-01-2012 / 10:42:30 / cg"
 !
 
 withMatchEscapes
@@ -5304,6 +5233,153 @@
     "Modified: 12.5.1996 / 13:05:10 / cg"
 !
 
+withoutAllSpaces
+    ^ self asCollectionOfWords asStringWith:''.
+"/    |col string|
+"/
+"/    col := self asCollectionOfWords.
+"/    string := String new.
+"/    col do:[:el |
+"/       string := string,el
+"/    ].
+"/    ^string
+
+    "
+     'hello wwww'  withoutAllSpaces
+     'hel   lo www   w'  withoutAllSpaces
+    "
+
+    "Modified: / 18.7.1998 / 22:53:08 / cg"
+!
+
+withoutCEscapes
+    "return a new string consisting of receivers characters
+     with all \X-character escapes replaced by corresponding-characters.
+     (similar to the way C-language Strings are converted).
+     The following escapes are supported:
+        \r      return character
+        \n      newline character
+        \b      backspace character
+        \f      formfeed character
+        \t      tab character
+        \e      escape character
+        \\      the \ character itself
+        \nnn    three digit octal number defining the characters ascii value
+        \xnn    two digit hex number defining the characters ascii value
+        \unnnn  four digit hex number defining the characters unicode value
+        \Unnnnnnnn  eight digit hex number defining the characters unicode value
+        \other  other
+
+     Notice, that \' is NOT a valid escape, since the general syntax of
+     string constants is not affected by this method.
+
+     Although easily implementable, this is NOT done automatically
+     by the compiler (due to a lack of a language standard for this).
+     However, the compiler may detect sends ot #withEscapes to string literals
+     and place a modified string constant into the binary/byte-code.
+     Therefore, no runtime penalty will be payed for using these escapes.
+     (not in pre 2.11 versions)
+    "
+
+    |val     "{ SmallInteger }"
+     in out nextChar nDigits|
+
+    "
+     first, see if there is any escape; if not, return the receiver unchanged
+    "
+    (self includes:$\) ifFalse:[^ self ].
+
+    self hasChangeOfEmphasis ifTrue:[ self error:'emphasis not supported' ].
+    out := CharacterWriteStream on:(String new:self size - 1).
+
+    in := ReadStream on:self.
+    [in atEnd] whileFalse:[
+        nextChar := in next.
+        nextChar == $\ ifTrue:[
+            in atEnd ifTrue:[
+            ] ifFalse:[
+                nextChar := in next.
+                nextChar == $r ifTrue:[
+                    nextChar := Character return
+                ] ifFalse:[ nextChar == $n ifTrue:[
+                    nextChar := Character nl
+                ] ifFalse:[ nextChar == $b ifTrue:[
+                    nextChar := Character backspace
+                ] ifFalse:[ nextChar == $f ifTrue:[
+                    nextChar := Character newPage
+                ] ifFalse:[ nextChar == $t ifTrue:[
+                    nextChar := Character tab
+                ] ifFalse:[ nextChar == $e ifTrue:[
+                    nextChar := Character esc
+                ] ifFalse:[
+                    nextChar == $0 ifTrue:[
+                        val := 0.
+                        nextChar := in peek.
+                        nDigits := 1.
+                        [nextChar notNil and:[nextChar isDigit and:[nDigits <= 3]]] whileTrue:[
+                            val := (val * 8) + nextChar digitValue.
+                            nextChar := in nextPeek.
+                            nDigits := nDigits + 1.
+                        ].
+                        nextChar := Character value:val.
+                    ] ifFalse:[
+                        val := 0.
+                        nextChar == $x ifTrue:[
+                            2 timesRepeat:[
+                                nextChar := in next.
+                                val := (val * 16) + nextChar digitValue.
+                            ].
+                            nextChar := Character value:val.
+                        ] ifFalse:[
+                            nextChar == $u ifTrue:[
+                                4 timesRepeat:[
+                                    nextChar := in next.
+                                    val := (val * 16) + nextChar digitValue.
+                                ].
+                                nextChar := Character value:val.
+                            ] ifFalse:[
+                                nextChar == $U ifTrue:[
+                                    8 timesRepeat:[
+                                        nextChar := in next.
+                                        val := (val * 16) + nextChar digitValue.
+                                    ].
+                                    nextChar := Character value:val.
+                                ]
+                            ]
+                        ]
+                    ]
+                ]]]]]].
+            ].
+        ].
+        out nextPut:nextChar.
+    ].
+    ^ out contents
+
+    "
+     'hello world' withoutCEscapes         
+     'hello\world' withoutCEscapes         
+     'hello\world\' withoutCEscapes        
+     'hello world\' withoutCEscapes        
+     'hello\tworld' withoutCEscapes        
+     'hello\nworld\na\n\tnice\n\t\tstring' withoutCEscapes
+     'hello\tworld\n' withoutCEscapes   
+     'hello\010world' withoutCEscapes   
+     'hello\r\nworld' withoutCEscapes   
+     'hello\r\n\x08world' withoutCEscapes 
+     '0\x080' withoutCEscapes       
+     '0\u12340' withoutCEscapes       
+     '0\U123456780' withoutCEscapes       
+     '0\0a' withoutCEscapes     
+     '0\00a' withoutCEscapes       
+     '0\000a' withoutCEscapes       
+     '0\0000a' withoutCEscapes       
+     '0\00000a' withoutCEscapes       
+     '0\03770' withoutCEscapes       
+    "
+
+    "Created: / 25-01-2012 / 10:41:44 / cg"
+!
+
 withoutCRs
     "return a new collection consisting of receivers elements
      with all cr-characters replaced by \-characters.
@@ -5938,15 +6014,15 @@
 !CharacterArray class methodsFor:'documentation'!
 
 version
-    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.464 2011/12/26 14:52:18 cg Exp $'
+    ^ '$Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.466 2012/02/06 09:38:25 cg Exp $'
 !
 
 version_CVS
-    ^ 'Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.464 2011/12/26 14:52:18 cg Exp '
+    ^ 'Header: /cvs/stx/stx/libbasic/CharacterArray.st,v 1.466 2012/02/06 09:38:25 cg Exp '
 !
 
 version_SVN
-    ^ '$Id: CharacterArray.st 10761 2012-01-19 11:46:00Z vranyj1 $'
+    ^ '$Id: CharacterArray.st 10777 2012-02-13 19:19:41Z vranyj1 $'
 ! !
 
 CharacterArray initialize!
@@ -5956,3 +6032,4 @@
 
 
 
+