--- a/HTMLUtilities.st Tue Nov 13 16:00:39 2018 +0100
+++ b/HTMLUtilities.st Wed Apr 24 12:02:49 2019 +0100
@@ -17,7 +17,8 @@
Object subclass:#HTMLUtilities
instanceVariableNames:''
- classVariableNames:'EscapeControlCharacters'
+ classVariableNames:'AmpersandEscapes EscapeControlCharacters HtmlEntityToCharacter
+ MathAmpersandEscapes'
poolDictionaries:''
category:'Net-Communication-Support'
!
@@ -63,21 +64,476 @@
"Modified: / 01-06-2010 / 11:25:12 / sr"
! !
+!HTMLUtilities class methodsFor:'constants'!
+
+ampersandEscapes
+ AmpersandEscapes isNil ifTrue:[
+ AmpersandEscapes := IdentityDictionary new.
+
+ #(
+ #nbsp 160 "/ non-breakable space - do something magic...
+
+ #emspace 160 "/ temporary
+ #enspace 160
+
+ #lt $<
+ #gt $>
+ #amp $&
+ #quot $"
+ #apos $'
+
+ #copy 169 "/ copyright
+ #reg 174 "/ registered
+
+ #cent 162
+ #pound 163
+ #yen 165
+ #brvbar $|
+ #sect 167
+ #laquo 171
+ #raquo 187
+ #plusmn 177
+ #micro 181
+ #middot 183
+ #frac14 188
+ #frac12 189
+ #frac34 190
+ #iquest 191
+ #iexcl 16rA1
+ #div 247
+ #divide 247
+ #not 16rAC
+ #shy 16rAD
+ #para 16rB6
+
+ #deg 176
+ #sup1 185
+ #sup2 178
+ #sup3 179
+
+ #ordm 16rBA
+ #ordf 16rAA
+ #macr 16rAF
+
+ #cedil 16rB8
+ #uml 16rA8
+ #acute 16rB4
+ #curren 16rA4
+
+ #Oslash 216
+ #oslash 248
+ #aring 229
+ #Aring 197
+
+ #ccedil 231
+ #Ccedil 199
+
+ #thorn 16rFE
+ #THORN 16rDE
+ #Thorn 15rDE
+
+ #eth 16rF0
+ #ETH 16rD0
+ #Eth 16rD0
+
+ #atilde 227
+ #Atilde 195
+ #ntilde 241
+ #Ntilde 209
+ #otilde 245
+ #Otilde 213
+
+ #auml 228
+ #Auml 196
+ #uuml 252
+ #Uuml 220
+ #ouml 246
+ #Ouml 214
+ #euml 235
+ #Euml 203
+ #iuml 239
+ #Iuml 207
+ #yuml 255
+
+ #acirc 226
+ #Acirc 194
+ #icirc 238
+ #Icirc 206
+ #ecirc 234
+ #Ecirc 202
+ #ucirc 251
+ #Ucirc 219
+ #ocirc 244
+ #Ocirc 212
+
+ #agrave 224
+ #Agrave 192
+ #egrave 232
+ #Egrave 200
+ #igrave 236
+ #Igrave 204
+ #ograve 242
+ #Ograve 210
+ #ugrave 249
+ #Ugrave 217
+
+ #aacute 225
+ #Aacute 193
+ #eacute 233
+ #Eacute 201
+ #iacute 237
+ #Iacute 205
+ #oacute 243
+ #Oacute 211
+ #uacute 250
+ #Uacute 218
+ #yacute 16rFD
+ #Yacute 16rDD
+
+ #szlig 223
+ #aelig 230
+ #AElig 198
+
+ "/ unicode
+
+ #OElig 16r0152 "/ 8859-2 (latin2)
+ #oelig 16r0153 "/ 8859-2 (latin2)
+
+ #ljlig 16r01C9 "/ 8859-2 (latin2)
+ #LJlig 16r01C7 "/ 8859-2 (latin2)
+ #Ljlig 16r01C8 "/ 8859-2 (latin2)
+
+ #Scaron 16r0160 "/ 8859-2 (latin2)
+ #scaron 16r0161 "/ 8859-2 (latin2)
+ #Yuml 16r0178 "/ 8859-2 (latin2)
+
+ #Alpha 16r0391 "/ greek alpha
+ #Beta 16r0392
+ #Gamma 16r0393
+ #Delta 16r0394
+ #Epsilon 16r0395
+ #Zeta 16r0396
+ #Eta 16r0397
+ #Theta 16r0398
+ #Iota 16r0399
+ #Kappa 16r039A
+ #Lambda 16r039B
+ #Mu 16r039C
+ #Nu 16r039D
+ #Xi 16r039E
+ #Omicron 16r039F
+ #Pi 16r03A0
+ #Rho 16r03A1
+ #Sigma 16r03A3
+ #Tau 16r03A4
+ #Upsilon 16r03A5
+ #Phi 16r03A6
+ #Chi 16r03A7
+ #Psi 16r03A8
+ #Omega 16r03A9
+
+ #alpha 16r03B1 "/ greek alpha
+ #beta 16r03B2
+ #gamma 16r03B3
+ #delta 16r03B4
+ #epsilon 16r03B5
+ #zeta 16r03B6
+ #eta 16r03B7
+ #theta 16r03B8
+ #iota 16r03B9
+ #kappa 16r03BA
+ #lambda 16r03BB
+ #mu 16r03BC
+ #nu 16r03BD
+ #xi 16r03BE
+ #omicron 16r03BF
+ #pi 16r03C0
+ #rho 16r03C1
+ #sigmaf 16r03C2
+ #sigma 16r03C3
+ #tau 16r03C4
+ #upsilon 16r03C5
+ #phi 16r03C6
+ #chi 16r03C7
+ #psi 16r03C8
+ #omega 16r03C9
+
+ #thetasym 16r03D1
+ #upsih 16r03D2
+ #piv 16r03D6
+
+ #ensp 16r2002
+ #emsp 16r2003
+
+ #thinsp 16r2009 "/ thin space
+ #zwnj 16r200C "/ zero width non-joiner
+ #zwj 16r200D "/ zero width joiner
+ #lrm 16r200E "/ left-to-right mark
+ #rlm 16r200F "/ right-to-left mark
+
+ #ndash 16r2013
+ #mdash 16r2014
+
+ #lsquo 16r2018 "/ left single quot. mark
+ #rsquo 16r2019 "/ right single quot. mark
+ #sbquo 16r201A "/ single low-9 quot. mark
+ #ldquo 16r201C "/ left double quot. mark
+ #rdquo 16r201D "/ right double quot. mark
+ #bdquo 16r201E "/ double low-9 quot. mark
+ #dagger 16r2020
+ #Dagger 16r2021 "/ double dagger
+
+ #bull 16r2022
+ #hellip 16r2026
+ #prime 16r2032
+ #Prime 16r2033
+ #oline 16r203E
+ #frasl 16r2044
+
+ #euro 16r20AC "/ 8859-16
+
+ #weierp 16r2118
+ #image 16r2111
+ #real 16r211C
+ #trade 16r2122
+ #angst 16r212B
+ #alefsym 16r2135
+ #larr 16r2190
+ #uarr 16r2191
+ #rarr 16r2192
+ #darr 16r2193
+ #harr 16r2194
+ #crarr 16r21B5
+ #lArr 16r21D0
+ #uArr 16r21D1
+ #rArr 16r21D2
+ #dArr 16r21D3
+ #hArr 16r21D4
+ #forall 16r2200
+ #part 16r2202
+ #exist 16r2203
+ #empty 16r2205
+ #nabla 16r2207
+ #isin 16r2208
+ #notin 16r2209
+ #ni 16r220B
+ #prod 16r220F
+ #sum 16r2211
+ #minus 16r2212
+ #lowast 16r2217
+ #radic 16r221A
+ #prop 16r221D
+ #infin 16r221E
+ #ang90 16r221F
+ #ang 16r2220
+ #angmsd 16r2221
+ #angsph 16r2222
+ #and 16r2227
+ #or 16r2228
+ #cap 16r2229
+ #cup 16r222A
+ #int 16r222B
+ #there4 16r2234
+ #sim 16r223C
+ #cong 16r2245
+ #asymp 16r2248
+ #ne 16r2260
+ #equiv 16r2261
+ #le 16r2264
+ #ge 16r2265
+ #sub 16r2282
+ #sup 16r2283
+ #nsub 16r2284
+ #sube 16r2286
+ #supe 16r2287
+ #oplus 16r2295
+ #otimes 16r2297
+ #perp 16r22A5
+ #sdot 16r22C5
+ #lceil 16r2308
+ #rceil 16r2309
+ #lfloor 16r230A
+ #rfloor 16r230B
+ #lang 16r2329
+ #rang 16r232A
+ #loz 16r25CA
+ #spades 16r2660
+ #clubs 16r2663
+ #hearts 16r2665
+ #diams 16r2666
+
+ ) pairWiseDo:[:key :val |
+ |v|
+
+ v := val.
+ val isInteger ifTrue:[
+ v := Character value:v
+ ].
+ AmpersandEscapes at:key put:v
+ ].
+ ].
+ ^ AmpersandEscapes
+
+ "Created: / 01-04-2019 / 14:34:25 / Claus Gittinger"
+!
+
+htmlEntityToCharacter
+ ^ self ampersandEscapes
+
+ "Modified: / 01-04-2019 / 14:36:41 / Claus Gittinger"
+!
+
+mathAmpersandEscapes
+ "these are obsolete now, as HTML4 added the missing stuff in the meantime."
+
+ MathAmpersandEscapes isNil ifTrue:[
+ MathAmpersandEscapes := IdentityDictionary new.
+
+ #(
+"/ #alpha 16r61 "/ greek alpha
+"/ #beta 16r62 "/ greek beta
+"/ #chi 16r63
+"/ #delta 16r64
+"/ #epsilon 16r65 "/ symbol characterSet has no epsilon
+ #vepsilon 16r65
+"/ #phi 16r66
+"/ #gamma 16r67
+"/ #eta 16r68
+"/ #iota 16r69
+ #varphi 16r6A
+"/ #kappa 16r6B
+"/ #lambda 16r6C
+"/ #mu 16r6D
+"/ #nu 16r6E
+"/ #omicron 16r6F
+"/ #pi 16r70
+"/ #theta 16r71
+ #vtheta 16r71 "/ symbol characterSet has no vtheta
+"/ #rho 16r72
+ #varrho 16r72 "/ symbol characterSet has no varrho
+"/ #sigma 16r73
+ #vsigma 16r56
+"/ #tau 16r74
+"/ #upsilon 16r75
+ #varpi 16r76
+"/ #omega 16r77
+"/ #xi 16r78
+"/ #psi 16r79
+"/ #zeta 16r7A
+
+
+
+"/ #Alpha 16r41 "/ greek alpha
+"/ #Beta 16r42 "/ greek beta
+"/ #Chi 16r43
+"/ #Delta 16r44
+"/ #Epsilon 16r45
+"/ #Phi 16r46
+"/ #Gamma 16r47
+"/ #Eta 16r48
+"/ #Iota 16r49
+"/
+"/ #Kappa 16r4B
+"/ #Lambda 16r4C
+"/ #Mu 16r4D
+"/ #Nu 16r4E
+"/ #Omicron 16r4F
+"/ #Pi 16r50
+"/ #Theta 16r51
+"/ #Rho 16r52
+"/ #Sigma 16r53
+"/ #Tau 16r54
+"/ #Upsilon 16rA1
+
+"/ #Omega 16r57
+"/ #Xi 16r58
+"/ #Psi 16r59
+"/ #Zeta 16r5A
+
+
+"/ #forall 16r22
+ #exist 16r24
+ #exists 16r24
+ #aleph 16rC0 "/ no, this is not alf ;-)
+ #Re 16rC2 "/ R fraktur
+ #Im 16rC1 "/ I fraktur
+ #infty 16rA5
+
+ #leq 16rA3 "/ less-equal
+ #geq 16rB3 "/ greater-equal
+ #equiv 16rBA "/ equivalent
+ #approx 16rBB
+ #cong 16r40
+"/ #neq 16rB9
+
+"/ #plusmn 16rB1
+ #times 16rB4
+"/ #div 16rB8
+ #oplus 16rC5
+ #otimes 16rC4
+ #oslash 16rC5
+
+ #sum 16rE5
+ #prod 16rD5
+
+ #uparrow 16rAD
+ #leftarrow 16rAC
+ #downarrow 16rAF
+ #rightarrow 16rAE
+ #leftrightarrow 16rAB
+ #Uparrow 16rDD
+ #Leftarrow 16rDC
+ #Downarrow 16rDF
+ #Rightarrow 16rDE
+ #Leftrightarrow 16rDB
+
+ #supset 16rC9
+ #supseteq 16rCA
+ #subset 16rCC
+ #subseteq 16rCD
+
+ #vee 16rDA
+ #wedge 16rD9
+ #neg 16rD8
+
+ #ldots 16rBC
+
+"/ #lfloor 16rEB
+"/ #rfloor 16rFB
+"/ #lceil 16rE9
+"/ #rceil 16rF9
+
+ ) pairWiseDo:[:key :val |
+ |v|
+
+ v := val.
+ val isInteger ifTrue:[
+ v := Character value:v
+ ].
+ MathAmpersandEscapes at:key put:v
+ ].
+ ].
+ ^ MathAmpersandEscapes
+
+ "Created: / 01-04-2019 / 14:40:51 / Claus Gittinger"
+! !
+
!HTMLUtilities class methodsFor:'helpers'!
characterFromHtmlEntityNamed:anHtmlEntityName
- anHtmlEntityName = 'lt' ifTrue:[^ $<].
- anHtmlEntityName = 'gt' ifTrue:[^ $>].
- anHtmlEntityName = 'amp' ifTrue:[^ $&].
- anHtmlEntityName = 'apos' ifTrue:[^ $'].
- anHtmlEntityName = 'quot' ifTrue:[^ $"].
+ ^ self ampersandEscapes
+ at:anHtmlEntityName asSymbol
+ ifAbsent:[
+ self halt.
+ "/ where to get the mapping???
+ "/ Answer: It is a mess. A good start may be
+ "/ https://www.w3.org/TR/html4/sgml/entities.html with 252 named entities.
+ "/ I guess an actual lookup table would be adequate.
+ $~
+ ]
- self halt. "/ where to get the mapping???
-
- ^ $~
-
- "Created: / 07-05-2015 / 15:23:40 / sr"
- "Modified: / 18-05-2015 / 12:15:36 / sr"
+ "Modified: / 01-04-2019 / 14:36:18 / Claus Gittinger"
+ "Modified: / 04-04-2019 / 10:46:22 / Maren"
!
controlCharacters
@@ -95,6 +551,25 @@
"Modified (comment): / 06-05-2015 / 16:17:31 / sr"
!
+copyReplaceCharactersWithHtmlEntitiesIn:aString
+ |stream htmlEntity|
+
+ stream := '' writeStream.
+ (aString ? '') do:[:eachCharacter |
+ htmlEntity := self htmlEntityForCharacter:eachCharacter.
+ htmlEntity isNil ifTrue:[
+ stream nextPut:eachCharacter.
+ ] ifFalse:[
+ stream
+ nextPut:$&;
+ nextPutAll:htmlEntity;
+ nextPut:$;.
+ ].
+ ].
+
+ ^ stream contents
+!
+
escapeCharacterEntities:aString
"helper to escape invalid/dangerous characters in html strings.
These are:
@@ -129,27 +604,10 @@
"/ and were developed independent of each other, but later moved to this common place.
- |rs ws c controlString|
-
- rs := ReadStream on: aString.
- ws := WriteStream on: ''.
- [ rs atEnd ] whileFalse: [
- c := rs next.
- controlString := controlCharacters notEmptyOrNil ifTrue:[controlCharacters at:c ifAbsent:nil] ifFalse:[nil].
- controlString notNil ifTrue:[
- ws nextPutAll:controlString.
- ] ifFalse:[
- c codePoint > 16r7F ifTrue:[
- ws
- nextPutAll:'&#';
- nextPutAll:(c codePoint printString);
- nextPutAll:';'.
- ] ifFalse:[
- ws nextPut:c.
- ]
+ ^ String
+ streamContents:[:ws |
+ self escapeCharacterEntities:aString andControlCharacters:controlCharacters on:ws.
]
- ].
- ^ ws contents
"
self escapeCharacterEntities:'a<b'
@@ -157,6 +615,69 @@
"
"Created: / 06-05-2015 / 16:29:51 / sr"
+ "Modified (format): / 05-02-2017 / 17:59:32 / cg"
+!
+
+escapeCharacterEntities:aString andControlCharacters:controlCharacters on:aWriteStream
+ "helper to escape invalid/dangerous characters in html strings.
+ These are:
+ control characters, '<', '>', '&' and space -> %XX ascii as hex digits
+ % -> %%
+ "
+ "/ TODO: this is similar to withSpecialHTMLCharactersEscaped.
+ "/ we should refactor this into one method only (can we do hex escapes always ?).
+ "/ Notice, that these two methods came into existance due to historic reasons
+ "/ and were developed independent of each other, but later moved to this common place.
+
+
+ |rs c controlString|
+
+ rs := ReadStream on: aString.
+ [ rs atEnd ] whileFalse: [
+ c := rs next.
+ controlString := controlCharacters notEmptyOrNil ifTrue:[controlCharacters at:c ifAbsent:nil] ifFalse:[nil].
+ controlString notNil ifTrue:[
+ aWriteStream nextPutAll:controlString.
+ ] ifFalse:[
+ c codePoint > 16r7F ifTrue:[
+ aWriteStream nextPutAll:'&#'.
+ c codePoint printOn:aWriteStream.
+ aWriteStream nextPut:$;.
+ ] ifFalse:[
+ aWriteStream nextPut:c.
+ ]
+ ]
+ ].
+
+ "
+ self escapeCharacterEntities:'a<b'
+ self escapeCharacterEntities:'aöb'
+ "
+
+ "Created: / 05-02-2017 / 17:58:34 / cg"
+ "Modified: / 17-02-2017 / 10:34:20 / stefan"
+!
+
+escapeCharacterEntities:aString on:aStream
+ "helper to escape invalid/dangerous characters in html strings.
+ These are:
+ control characters, '<', '>', '&' and space -> %XX ascii as hex digits
+ % -> %%
+ "
+ "/ TODO: this is similar to withSpecialHTMLCharactersEscaped.
+ "/ we should refactor this into one method only (can we do hex escapes always ?).
+ "/ Notice, that these two methods came into existance due to historic reasons
+ "/ and were developed independent of each other, but later moved to this common place.
+
+
+ ^self escapeCharacterEntities:aString andControlCharacters:self controlCharacters on:aStream
+
+ "
+ self escapeCharacterEntities:'a<b'
+ self escapeCharacterEntities:'aöb'
+ "
+
+ "Created: / 05-02-2017 / 18:00:56 / cg"
!
extractCharSetEncodingFromContentType:contentTypeLine
@@ -208,13 +729,24 @@
"
!
+htmlEntityForCharacter:aCharacter
+ aCharacter == Character space ifTrue:[^ nil].
+ aCharacter isLetterOrDigit ifTrue:[^ nil].
+
+ ^ self ampersandEscapes
+ keyAtValue:aCharacter
+ ifAbsent:nil
+
+ "Modified: / 01-04-2019 / 14:36:25 / Claus Gittinger"
+!
+
unEscape:aString
"Convert escaped characters in an urls arguments or post fields back to their proper characters.
- Undoes the effect of urlEncode and urlEncode2.
+ Undoes the effect of #urlEncoded: and #urlEncoded2:.
These are:
+ -> space
%XX ascii as hex digits
- %uXXXX unicode as hex digits
+ %uXXXX unicode as hex digits NOTE: %u is non-standard bit implemented in MS IIS
%% -> %
"
@@ -276,6 +808,7 @@
"Modified: / 09-01-2011 / 10:44:50 / cg"
"Modified (comment): / 06-05-2015 / 15:40:04 / sr"
+ "Modified (comment): / 03-02-2017 / 17:06:32 / stefan"
!
unescapeCharacterEntities:aString
@@ -345,7 +878,7 @@
htmlEntityMatchingFailed ifTrue:[
ws nextPut:c.
ws nextPutAll:entity.
- ws nextPutAll:$;.
+ ws nextPut:$;.
].
] ifFalse:[
ws nextPut:c.
@@ -374,9 +907,35 @@
"Created: / 06-05-2015 / 16:56:14 / sr"
"Modified: / 18-05-2015 / 12:13:35 / sr"
+ "Modified: / 17-02-2017 / 10:18:35 / stefan"
+!
+
+urlDecoded:aString
+ "Convert escaped characters in an urls arguments or post fields back to their proper characters.
+ Undoes the effect of #urlEncoded: and #urlEncoded2:.
+ These are:
+ + -> space
+ %XX ascii as hex digits
+ %uXXXX unicode as hex digits NOTE: %u is non-standard bit implemented in MS IIS
+ %% -> %
+ "
+ ^ (self unEscape:aString) utf8Decoded
+
+ "
+ self urlDecoded:'a%20b'
+ self urlDecoded:'a%%b'
+ self urlDecoded:'a+b'
+ self urlDecoded:'a%+b'
+ self urlDecoded:'a%'
+ self urlDecoded:'a%2'
+ self urlDecoded:'/Home/a%C3%A4%C3%B6%C3%BCa'
+ "
+
+ "Created: / 26-08-2018 / 12:49:24 / Claus Gittinger"
!
urlEncode2:aStringOrStream on:ws
+ <resource: #obsolete>
"helper to escape invalid/dangerous characters in an urls arguments.
Similar to urlEncode, but treats '*','~' and spaces differently.
(some clients, such as bitTorrent seem to require this - time will tell...)
@@ -411,45 +970,60 @@
!
urlEncode:aStringOrStream on:ws
- "helper to escape invalid/dangerous characters in an urls arguments or post-fields.
- Similar to urlEncode2, but treats '*','~' and spaces differently.
- (some clients, such as bitTorrent seem to require urlEncode2 - time will tell...)
- Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '*', is encoded using
- the '%nn' format, where nn is the hexadecimal value of the byte.
+ "helper to escape invalid/dangerous characters in an urlÄs argument or post-fields.
+
+ Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '~',
+ is encoded using the '%nn' format, where nn is the hexadecimal value of the byte.
+ Characters outside the ASCII range are encoded into utf8 first.
Spaces are encoded as '+'.
see: application/x-www-form-urlencoded
- see: RFC1738"
+ see: https://tools.ietf.org/html/rfc3986 (obsoletes RFC1738)"
- |rs c cp space|
+ |rs c|
- space := Character space.
rs := aStringOrStream readStream.
- [rs atEnd] whileFalse: [
- c := rs next.
+ [(c := rs nextOrNil) notNil] whileTrue: [
+ |cp|
- (c isLetterOrDigit or:[ '-_.*' includes:c ]) ifTrue:[
+ (c isLetterOrDigit or:['-_.~' includes:c]) ifTrue:[
ws nextPut:c.
] ifFalse:[
- c == space ifTrue:[
+ c == Character space ifTrue:[
ws nextPut:$+.
] ifFalse:[
- ws nextPut: $%.
- (cp := c codePoint) > 16rFF ifTrue:[
- ws nextPut: $u.
- cp printOn:ws base:16 size:4 fill:$0.
+ cp := c codePoint.
+ cp > 16r7F ifTrue:[
+ c utf8Encoded do:[:eachUtf8Char|
+ ws nextPut: $%.
+ eachUtf8Char codePoint printOn:ws base:16 size:2 fill:$0.
+ ].
] ifFalse:[
+ ws nextPut: $%.
cp printOn:ws base:16 size:2 fill:$0.
].
].
].
].
+ "
+ self urlEncoded:'hokus pokus fidibus*-/~'
+ self urlEncoded:'Ützel Brötzel*-/~'
+ self urlEncoded:'χαιρε'
+
+ self urlDecoded:(self urlEncoded:'hokus pokus fidibus*-/~')
+ self urlDecoded:(self urlEncoded:'Ützel Brötzel*-/~')
+ self urlDecoded:(self urlEncoded:'χαιρε')
+ "
+
"Modified: / 09-01-2011 / 10:43:30 / cg"
"Modified: / 06-05-2015 / 16:06:52 / sr"
+ "Modified (comment): / 07-02-2017 / 14:51:42 / stefan"
+ "Modified (comment): / 26-08-2018 / 12:50:04 / Claus Gittinger"
!
urlEncoded2: aString
+ <resource: #obsolete>
"helper to escape invalid/dangerous characters in an urls arguments or post-fields.
Similar to urlEncoded, but treats '*','~' and spaces differently.
(some clients, such as bitTorrent seem to require this - time will tell...)
@@ -479,17 +1053,17 @@
urlEncoded: aString
"helper to escape invalid/dangerous characters in an urls arguments or post-fields.
- Similar to urlEncoded2, but treats '*','~' and spaces differently.
- (some clients, such as bitTorrent seem to require urlEncoded2 - time will tell...)
- Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '*', is encoded using
+
+ Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '~', is encoded using
the '%nn' format, where nn is the hexadecimal value of the byte.
+ Characters outside the ASCII range are encoded into utf8 first.
Spaces are encoded as '+'.
see: application/x-www-form-urlencoded
- see: RFC1738"
+ see: https://tools.ietf.org/html/rfc3986 (obsoletes RFC1738)"
|ws|
- ws := String writeStreamWithInitialSize:aString size.
+ ws := WriteStream on:(String new:aString size + 20).
self urlEncode:aString on:ws.
^ ws contents
@@ -503,6 +1077,7 @@
"
"Modified: / 09-01-2011 / 10:43:37 / cg"
+ "Modified: / 07-02-2017 / 14:54:12 / stefan"
!
withAllSpecialHTMLCharactersEscaped:aStringOrCharacter
@@ -661,7 +1236,6 @@
|parser doc s first|
-
parser := HTMLParser new.
doc := parser parseText:htmlString.
s := CharacterWriteStream on:(String new:100).
@@ -687,8 +1261,8 @@
"
self plainTextOfHTML:'
-bla1 bla2 <br>bla3 <table><tr><td>bla4</td></tr></table> bla5<p>bla6
-'
+ bla1 bla2 <br>bla3 <table><tr><td>bla4</td></tr></table> bla5<p>bla6'
+ self plainTextOfHTML:'Hello World'
"
"Modified: / 06-05-2015 / 17:02:36 / sr"