2058
|
1 |
"
|
|
2 |
COPYRIGHT (c) 2007 by eXept Software AG
|
|
3 |
All Rights Reserved
|
|
4 |
|
|
5 |
This software is furnished under a license and may be used
|
|
6 |
only in accordance with the terms of that license and with the
|
|
7 |
inclusion of the above copyright notice. This software may not
|
|
8 |
be provided or otherwise made available to, or used by, any
|
|
9 |
other person. No title to or ownership of the software is
|
|
10 |
hereby transferred.
|
|
11 |
"
|
|
12 |
"{ Package: 'stx:libbasic2' }"
|
|
13 |
|
|
14 |
Object subclass:#HTMLUtilities
|
|
15 |
instanceVariableNames:''
|
|
16 |
classVariableNames:'EscapeControlCharacters'
|
|
17 |
poolDictionaries:''
|
|
18 |
category:'Net-Communication-Support'
|
|
19 |
!
|
|
20 |
|
|
21 |
!HTMLUtilities class methodsFor:'documentation'!
|
|
22 |
|
|
23 |
copyright
|
|
24 |
"
|
|
25 |
COPYRIGHT (c) 2007 by eXept Software AG
|
|
26 |
All Rights Reserved
|
|
27 |
|
|
28 |
This software is furnished under a license and may be used
|
|
29 |
only in accordance with the terms of that license and with the
|
|
30 |
inclusion of the above copyright notice. This software may not
|
|
31 |
be provided or otherwise made available to, or used by, any
|
|
32 |
other person. No title to or ownership of the software is
|
|
33 |
hereby transferred.
|
|
34 |
"
|
|
35 |
!
|
|
36 |
|
|
37 |
documentation
|
|
38 |
"
|
|
39 |
Collected support functions to deal with HTML.
|
|
40 |
Used both by HTML generators (DocGenerator), HTMLParsers and the webServer.
|
|
41 |
Therefore, it has been put into libbasic2.
|
|
42 |
"
|
|
43 |
! !
|
|
44 |
|
|
45 |
!HTMLUtilities class methodsFor:'helpers'!
|
|
46 |
|
|
47 |
controlCharacters
|
|
48 |
|
|
49 |
EscapeControlCharacters isNil ifTrue:[
|
|
50 |
EscapeControlCharacters := Dictionary new.
|
|
51 |
"/ EscapeControlCharacters at:Character space put:' '.
|
|
52 |
EscapeControlCharacters at:$< put:'<'.
|
|
53 |
EscapeControlCharacters at:$> put:'>'.
|
|
54 |
EscapeControlCharacters at:$& put:'&'.
|
|
55 |
EscapeControlCharacters at:$" put:'"'.
|
|
56 |
].
|
|
57 |
^ EscapeControlCharacters.
|
|
58 |
!
|
|
59 |
|
|
60 |
escapeCharacterEntities:aString
|
|
61 |
"helper to escape invalid/dangerous characters in html strings.
|
|
62 |
These are:
|
|
63 |
control characters, '<', '>', '&' and space -> %XX ascii as hex digits
|
|
64 |
% -> %%
|
|
65 |
"
|
|
66 |
|rs ws c controlCharacters controlString|
|
|
67 |
|
|
68 |
controlCharacters := self controlCharacters.
|
|
69 |
rs := ReadStream on: aString.
|
|
70 |
ws := WriteStream on: ''.
|
|
71 |
[ rs atEnd ] whileFalse: [
|
|
72 |
c := rs next.
|
|
73 |
controlString := controlCharacters at:c ifAbsent:nil.
|
|
74 |
controlString notNil ifTrue:[
|
|
75 |
ws nextPutAll:controlString.
|
|
76 |
] ifFalse:[
|
|
77 |
c codePoint > 16r7F ifTrue:[
|
|
78 |
ws
|
|
79 |
nextPutAll:'&#';
|
|
80 |
nextPutAll:(c codePoint printString);
|
|
81 |
nextPutAll:';'.
|
|
82 |
] ifFalse:[
|
|
83 |
ws nextPut:c.
|
|
84 |
]
|
|
85 |
]
|
|
86 |
].
|
|
87 |
^ ws contents
|
|
88 |
|
|
89 |
|
|
90 |
"
|
|
91 |
self escapeCharacterEntities:'a<b'
|
|
92 |
self escapeCharacterEntities:'aöb'
|
|
93 |
"
|
|
94 |
!
|
|
95 |
|
|
96 |
extractCharSetEncodingFromContentType:contentTypeLine
|
|
97 |
|idx rest encoding|
|
|
98 |
|
|
99 |
idx := contentTypeLine findString:'charset='.
|
|
100 |
idx == 0 ifTrue:[
|
|
101 |
^ nil
|
|
102 |
].
|
|
103 |
rest := (contentTypeLine copyFrom:idx+'charset=' size) withoutSeparators.
|
|
104 |
idx := (rest indexOfSeparator) min:(rest indexOf:$;).
|
|
105 |
idx == 0 ifTrue:[
|
|
106 |
encoding := rest
|
|
107 |
] ifFalse:[
|
|
108 |
encoding := rest copyTo:idx-1.
|
|
109 |
].
|
|
110 |
(encoding startsWith:$") ifTrue:[
|
|
111 |
encoding := encoding copyFrom:2 to:(encoding indexOf:$" startingAt:3)-1.
|
|
112 |
].
|
|
113 |
^ encoding.
|
|
114 |
|
|
115 |
"
|
|
116 |
self extractCharSetEncodingFromContentType:'text/html; charset=ascii'
|
|
117 |
self extractCharSetEncodingFromContentType:'text/html; charset='
|
|
118 |
self extractCharSetEncodingFromContentType:'text/html; fooBar=bla'
|
|
119 |
self extractCharSetEncodingFromContentType:'text/xml; charset=utf-8'
|
|
120 |
self extractCharSetEncodingFromContentType:'text/xml; charset=utf-8; bla=fasel'
|
|
121 |
"
|
|
122 |
!
|
|
123 |
|
|
124 |
extractMimeTypeFromContentType:contentTypeLine
|
|
125 |
|idx mimeAndEncoding|
|
|
126 |
|
|
127 |
idx := contentTypeLine indexOf:$:.
|
|
128 |
mimeAndEncoding := (contentTypeLine copyFrom:idx+1) withoutSeparators.
|
|
129 |
|
|
130 |
(mimeAndEncoding includes:$;) ifFalse:[
|
|
131 |
^ mimeAndEncoding
|
|
132 |
].
|
|
133 |
|
|
134 |
idx := mimeAndEncoding indexOf:$;.
|
|
135 |
^ mimeAndEncoding copyTo:idx-1
|
|
136 |
|
|
137 |
"
|
|
138 |
self extractMimeTypeFromContentType:'text/html; charset=ascii'
|
|
139 |
self extractMimeTypeFromContentType:'text/html; '
|
|
140 |
self extractMimeTypeFromContentType:'text/html'
|
|
141 |
self extractMimeTypeFromContentType:'text/xml; charset=utf-8'
|
|
142 |
"
|
|
143 |
!
|
|
144 |
|
|
145 |
unEscape:aString
|
|
146 |
"Convert escaped characters in an urls arguments or post fields to their proper characters.
|
|
147 |
These are:
|
|
148 |
+ -> space
|
|
149 |
%XX ascii as hex digits
|
|
150 |
%% -> %
|
|
151 |
"
|
|
152 |
|
|
153 |
|rs ws c peekC|
|
|
154 |
|
|
155 |
(aString indexOfAny:'+%') == 0 ifTrue:[
|
|
156 |
^ aString
|
|
157 |
].
|
|
158 |
|
|
159 |
rs := ReadStream on: aString.
|
|
160 |
ws := WriteStream on: ''.
|
|
161 |
[rs atEnd] whileFalse:[
|
|
162 |
c := rs next.
|
|
163 |
c == $+
|
|
164 |
ifTrue:[ c := Character space ]
|
|
165 |
ifFalse:[
|
|
166 |
c == $%
|
|
167 |
ifTrue: [
|
|
168 |
peekC := rs peek.
|
|
169 |
(peekC notNil and:[peekC isHexDigit]) ifTrue:[
|
|
170 |
c := (Integer readFrom:(rs next:2) radix:16) asCharacter
|
|
171 |
] ifFalse:[
|
|
172 |
c := rs next.
|
|
173 |
]
|
|
174 |
]
|
|
175 |
].
|
|
176 |
ws nextPut: c.
|
|
177 |
].
|
|
178 |
^ ws contents
|
|
179 |
|
|
180 |
"
|
|
181 |
self new unEscape:'a%20b'
|
|
182 |
self new unEscape:'a%%b'
|
|
183 |
self new unEscape:'a+b'
|
|
184 |
self new unEscape:'a%+b'
|
|
185 |
"
|
|
186 |
!
|
|
187 |
|
|
188 |
urlEncoded: aString
|
|
189 |
"helper to escape invalid/dangerous characters in an urls arguments or post-fields.
|
|
190 |
see: application/x-www-form-urlencoded
|
|
191 |
"
|
|
192 |
|
|
193 |
"
|
|
194 |
self unEscape:(self urlEncoded:'_-.*Frankfurt(Main) Hbf')
|
|
195 |
self urlEncoded:'_-.*Frankfurt(Main) Hbf') unescape
|
|
196 |
|
|
197 |
self unEscape:(self urlEncoded:'-_.*%exept;')
|
|
198 |
self urlEncoded:'-_.*%exept;'
|
|
199 |
"
|
|
200 |
|
|
201 |
| rs ws c space|
|
|
202 |
|
|
203 |
space := Character space.
|
|
204 |
rs := ReadStream on: aString.
|
|
205 |
ws := WriteStream on: ''.
|
|
206 |
|
|
207 |
[ rs atEnd ] whileFalse: [
|
|
208 |
c := rs next.
|
|
209 |
|
|
210 |
c isLetterOrDigit ifTrue:[
|
|
211 |
ws nextPut:c.
|
|
212 |
] ifFalse:[
|
|
213 |
c == space ifTrue:[
|
|
214 |
ws nextPut:$+.
|
|
215 |
] ifFalse:[
|
|
216 |
('-_.*' includes:c) ifTrue:[
|
|
217 |
ws nextPut:c.
|
|
218 |
] ifFalse:[
|
|
219 |
ws nextPut: $%.
|
|
220 |
c codePoint printOn:ws base:16.
|
|
221 |
].
|
|
222 |
].
|
|
223 |
].
|
|
224 |
].
|
|
225 |
^ ws contents
|
|
226 |
|
|
227 |
|
|
228 |
"
|
|
229 |
self new escape:'a b'
|
|
230 |
self new escape:'a%b'
|
|
231 |
self new escape:'a b'
|
|
232 |
self new escape:'a+b'
|
|
233 |
"
|
|
234 |
! !
|
|
235 |
|
|
236 |
!HTMLUtilities class methodsFor:'serving-helpers'!
|
|
237 |
|
|
238 |
escape:aString
|
|
239 |
"helper to escape invalid/dangerous characters in an urls arguments or post-fields.
|
|
240 |
These are:
|
|
241 |
control characters, '+', ';', '?', '&' and space -> %XX ascii as hex digits
|
|
242 |
% -> %%
|
|
243 |
"
|
|
244 |
|
|
245 |
| rs ws c |
|
|
246 |
|
|
247 |
rs := ReadStream on: aString.
|
|
248 |
ws := WriteStream on: ''.
|
|
249 |
[ rs atEnd ] whileFalse: [
|
|
250 |
c := rs next.
|
|
251 |
c == $% ifTrue:[
|
|
252 |
ws nextPutAll: '%%'.
|
|
253 |
] ifFalse:[
|
|
254 |
((c codePoint < 16r7F)
|
|
255 |
and:[ ('+;?& ' includes:c) not ]) ifTrue: [
|
|
256 |
ws nextPut: c.
|
|
257 |
] ifFalse:[
|
|
258 |
ws nextPut: $%.
|
|
259 |
c codePoint printOn:ws base:16.
|
|
260 |
]
|
|
261 |
]
|
|
262 |
].
|
|
263 |
^ ws contents
|
|
264 |
|
|
265 |
"
|
|
266 |
self escape:'a b'
|
|
267 |
self escape:'a%b'
|
|
268 |
self escape:'a b'
|
|
269 |
self escape:'a+b'
|
|
270 |
self escape:'aäüöb'
|
|
271 |
"
|
|
272 |
! !
|
|
273 |
|
|
274 |
!HTMLUtilities class methodsFor:'documentation'!
|
|
275 |
|
|
276 |
version
|
|
277 |
^ '$Header: /cvs/stx/stx/libbasic2/HTMLUtilities.st,v 1.1 2008-12-03 12:23:06 cg Exp $'
|
|
278 |
! !
|