--- a/HTMLUtilities.st Fri Feb 03 11:48:07 2017 +0100
+++ b/HTMLUtilities.st Sun Feb 05 17:59:56 2017 +0100
@@ -127,27 +127,10 @@
"/ and were developed independent of each other, but later moved to this common place.
- |rs ws c controlString|
-
- rs := ReadStream on: aString.
- ws := WriteStream on: ''.
- [ rs atEnd ] whileFalse: [
- c := rs next.
- controlString := controlCharacters notEmptyOrNil ifTrue:[controlCharacters at:c ifAbsent:nil] ifFalse:[nil].
- controlString notNil ifTrue:[
- ws nextPutAll:controlString.
- ] ifFalse:[
- c codePoint > 16r7F ifTrue:[
- ws
- nextPutAll:'&#';
- nextPutAll:(c codePoint printString);
- nextPutAll:';'.
- ] ifFalse:[
- ws nextPut:c.
- ]
+ ^ String
+ streamContents:[:ws |
+ self escapeCharacterEntities:aString andControlCharacters:controlCharacters on:ws.
]
- ].
- ^ ws contents
"
self escapeCharacterEntities:'a<b'
@@ -155,6 +138,47 @@
"
"Created: / 06-05-2015 / 16:29:51 / sr"
+ "Modified (format): / 05-02-2017 / 17:59:32 / cg"
+!
+
+escapeCharacterEntities:aString andControlCharacters:controlCharacters on:aWriteStream
+ "helper to escape invalid/dangerous characters in html strings.
+ These are:
+ control characters, '<', '>', '&' and space -> %XX ascii as hex digits
+ % -> %%
+ "
+ "/ TODO: this is similar to withSpecialHTMLCharactersEscaped.
+ "/ we should refactor this into one method only (can we do hex escapes always ?).
+ "/ Notice, that these two methods came into existance due to historic reasons
+ "/ and were developed independent of each other, but later moved to this common place.
+
+
+ |rs c controlString|
+
+ rs := ReadStream on: aString.
+ [ rs atEnd ] whileFalse: [
+ c := rs next.
+ controlString := controlCharacters notEmptyOrNil ifTrue:[controlCharacters at:c ifAbsent:nil] ifFalse:[nil].
+ controlString notNil ifTrue:[
+ aWriteStream nextPutAll:controlString.
+ ] ifFalse:[
+ c codePoint > 16r7F ifTrue:[
+ aWriteStream
+ nextPutAll:'&#';
+ nextPutAll:(c codePoint printString);
+ nextPutAll:';'.
+ ] ifFalse:[
+ aWriteStream nextPut:c.
+ ]
+ ]
+ ].
+
+ "
+ self escapeCharacterEntities:'a<b'
+ self escapeCharacterEntities:'aöb'
+ "
+
+ "Created: / 05-02-2017 / 17:58:34 / cg"
!
extractCharSetEncodingFromContentType:contentTypeLine