|
1 " |
|
2 COPYRIGHT (c) 2007 by eXept Software AG |
|
3 All Rights Reserved |
|
4 |
|
5 This software is furnished under a license and may be used |
|
6 only in accordance with the terms of that license and with the |
|
7 inclusion of the above copyright notice. This software may not |
|
8 be provided or otherwise made available to, or used by, any |
|
9 other person. No title to or ownership of the software is |
|
10 hereby transferred. |
|
11 " |
|
12 "{ Package: 'stx:libbasic2' }" |
|
13 |
|
14 Object subclass:#HTMLUtilities |
|
15 instanceVariableNames:'' |
|
16 classVariableNames:'EscapeControlCharacters' |
|
17 poolDictionaries:'' |
|
18 category:'Net-Communication-Support' |
|
19 ! |
|
20 |
|
21 !HTMLUtilities class methodsFor:'documentation'! |
|
22 |
|
23 copyright |
|
24 " |
|
25 COPYRIGHT (c) 2007 by eXept Software AG |
|
26 All Rights Reserved |
|
27 |
|
28 This software is furnished under a license and may be used |
|
29 only in accordance with the terms of that license and with the |
|
30 inclusion of the above copyright notice. This software may not |
|
31 be provided or otherwise made available to, or used by, any |
|
32 other person. No title to or ownership of the software is |
|
33 hereby transferred. |
|
34 " |
|
35 ! |
|
36 |
|
37 documentation |
|
38 " |
|
39 Collected support functions to deal with HTML. |
|
40 Used both by HTML generators (DocGenerator), HTMLParsers and the webServer. |
|
41 Therefore, it has been put into libbasic2. |
|
42 " |
|
43 ! ! |
|
44 |
|
45 !HTMLUtilities class methodsFor:'helpers'! |
|
46 |
|
47 controlCharacters |
|
48 |
|
49 EscapeControlCharacters isNil ifTrue:[ |
|
50 EscapeControlCharacters := Dictionary new. |
|
51 "/ EscapeControlCharacters at:Character space put:' '. |
|
52 EscapeControlCharacters at:$< put:'<'. |
|
53 EscapeControlCharacters at:$> put:'>'. |
|
54 EscapeControlCharacters at:$& put:'&'. |
|
55 EscapeControlCharacters at:$" put:'"'. |
|
56 ]. |
|
57 ^ EscapeControlCharacters. |
|
58 ! |
|
59 |
|
60 escapeCharacterEntities:aString |
|
61 "helper to escape invalid/dangerous characters in html strings. |
|
62 These are: |
|
63 control characters, '<', '>', '&' and space -> %XX ascii as hex digits |
|
64 % -> %% |
|
65 " |
|
66 |rs ws c controlCharacters controlString| |
|
67 |
|
68 controlCharacters := self controlCharacters. |
|
69 rs := ReadStream on: aString. |
|
70 ws := WriteStream on: ''. |
|
71 [ rs atEnd ] whileFalse: [ |
|
72 c := rs next. |
|
73 controlString := controlCharacters at:c ifAbsent:nil. |
|
74 controlString notNil ifTrue:[ |
|
75 ws nextPutAll:controlString. |
|
76 ] ifFalse:[ |
|
77 c codePoint > 16r7F ifTrue:[ |
|
78 ws |
|
79 nextPutAll:'&#'; |
|
80 nextPutAll:(c codePoint printString); |
|
81 nextPutAll:';'. |
|
82 ] ifFalse:[ |
|
83 ws nextPut:c. |
|
84 ] |
|
85 ] |
|
86 ]. |
|
87 ^ ws contents |
|
88 |
|
89 |
|
90 " |
|
91 self escapeCharacterEntities:'a<b' |
|
92 self escapeCharacterEntities:'aöb' |
|
93 " |
|
94 ! |
|
95 |
|
96 extractCharSetEncodingFromContentType:contentTypeLine |
|
97 |idx rest encoding| |
|
98 |
|
99 idx := contentTypeLine findString:'charset='. |
|
100 idx == 0 ifTrue:[ |
|
101 ^ nil |
|
102 ]. |
|
103 rest := (contentTypeLine copyFrom:idx+'charset=' size) withoutSeparators. |
|
104 idx := (rest indexOfSeparator) min:(rest indexOf:$;). |
|
105 idx == 0 ifTrue:[ |
|
106 encoding := rest |
|
107 ] ifFalse:[ |
|
108 encoding := rest copyTo:idx-1. |
|
109 ]. |
|
110 (encoding startsWith:$") ifTrue:[ |
|
111 encoding := encoding copyFrom:2 to:(encoding indexOf:$" startingAt:3)-1. |
|
112 ]. |
|
113 ^ encoding. |
|
114 |
|
115 " |
|
116 self extractCharSetEncodingFromContentType:'text/html; charset=ascii' |
|
117 self extractCharSetEncodingFromContentType:'text/html; charset=' |
|
118 self extractCharSetEncodingFromContentType:'text/html; fooBar=bla' |
|
119 self extractCharSetEncodingFromContentType:'text/xml; charset=utf-8' |
|
120 self extractCharSetEncodingFromContentType:'text/xml; charset=utf-8; bla=fasel' |
|
121 " |
|
122 ! |
|
123 |
|
124 extractMimeTypeFromContentType:contentTypeLine |
|
125 |idx mimeAndEncoding| |
|
126 |
|
127 idx := contentTypeLine indexOf:$:. |
|
128 mimeAndEncoding := (contentTypeLine copyFrom:idx+1) withoutSeparators. |
|
129 |
|
130 (mimeAndEncoding includes:$;) ifFalse:[ |
|
131 ^ mimeAndEncoding |
|
132 ]. |
|
133 |
|
134 idx := mimeAndEncoding indexOf:$;. |
|
135 ^ mimeAndEncoding copyTo:idx-1 |
|
136 |
|
137 " |
|
138 self extractMimeTypeFromContentType:'text/html; charset=ascii' |
|
139 self extractMimeTypeFromContentType:'text/html; ' |
|
140 self extractMimeTypeFromContentType:'text/html' |
|
141 self extractMimeTypeFromContentType:'text/xml; charset=utf-8' |
|
142 " |
|
143 ! |
|
144 |
|
145 unEscape:aString |
|
146 "Convert escaped characters in an urls arguments or post fields to their proper characters. |
|
147 These are: |
|
148 + -> space |
|
149 %XX ascii as hex digits |
|
150 %% -> % |
|
151 " |
|
152 |
|
153 |rs ws c peekC| |
|
154 |
|
155 (aString indexOfAny:'+%') == 0 ifTrue:[ |
|
156 ^ aString |
|
157 ]. |
|
158 |
|
159 rs := ReadStream on: aString. |
|
160 ws := WriteStream on: ''. |
|
161 [rs atEnd] whileFalse:[ |
|
162 c := rs next. |
|
163 c == $+ |
|
164 ifTrue:[ c := Character space ] |
|
165 ifFalse:[ |
|
166 c == $% |
|
167 ifTrue: [ |
|
168 peekC := rs peek. |
|
169 (peekC notNil and:[peekC isHexDigit]) ifTrue:[ |
|
170 c := (Integer readFrom:(rs next:2) radix:16) asCharacter |
|
171 ] ifFalse:[ |
|
172 c := rs next. |
|
173 ] |
|
174 ] |
|
175 ]. |
|
176 ws nextPut: c. |
|
177 ]. |
|
178 ^ ws contents |
|
179 |
|
180 " |
|
181 self new unEscape:'a%20b' |
|
182 self new unEscape:'a%%b' |
|
183 self new unEscape:'a+b' |
|
184 self new unEscape:'a%+b' |
|
185 " |
|
186 ! |
|
187 |
|
188 urlEncoded: aString |
|
189 "helper to escape invalid/dangerous characters in an urls arguments or post-fields. |
|
190 see: application/x-www-form-urlencoded |
|
191 " |
|
192 |
|
193 " |
|
194 self unEscape:(self urlEncoded:'_-.*Frankfurt(Main) Hbf') |
|
195 self urlEncoded:'_-.*Frankfurt(Main) Hbf') unescape |
|
196 |
|
197 self unEscape:(self urlEncoded:'-_.*%exept;') |
|
198 self urlEncoded:'-_.*%exept;' |
|
199 " |
|
200 |
|
201 | rs ws c space| |
|
202 |
|
203 space := Character space. |
|
204 rs := ReadStream on: aString. |
|
205 ws := WriteStream on: ''. |
|
206 |
|
207 [ rs atEnd ] whileFalse: [ |
|
208 c := rs next. |
|
209 |
|
210 c isLetterOrDigit ifTrue:[ |
|
211 ws nextPut:c. |
|
212 ] ifFalse:[ |
|
213 c == space ifTrue:[ |
|
214 ws nextPut:$+. |
|
215 ] ifFalse:[ |
|
216 ('-_.*' includes:c) ifTrue:[ |
|
217 ws nextPut:c. |
|
218 ] ifFalse:[ |
|
219 ws nextPut: $%. |
|
220 c codePoint printOn:ws base:16. |
|
221 ]. |
|
222 ]. |
|
223 ]. |
|
224 ]. |
|
225 ^ ws contents |
|
226 |
|
227 |
|
228 " |
|
229 self new escape:'a b' |
|
230 self new escape:'a%b' |
|
231 self new escape:'a b' |
|
232 self new escape:'a+b' |
|
233 " |
|
234 ! ! |
|
235 |
|
236 !HTMLUtilities class methodsFor:'serving-helpers'! |
|
237 |
|
238 escape:aString |
|
239 "helper to escape invalid/dangerous characters in an urls arguments or post-fields. |
|
240 These are: |
|
241 control characters, '+', ';', '?', '&' and space -> %XX ascii as hex digits |
|
242 % -> %% |
|
243 " |
|
244 |
|
245 | rs ws c | |
|
246 |
|
247 rs := ReadStream on: aString. |
|
248 ws := WriteStream on: ''. |
|
249 [ rs atEnd ] whileFalse: [ |
|
250 c := rs next. |
|
251 c == $% ifTrue:[ |
|
252 ws nextPutAll: '%%'. |
|
253 ] ifFalse:[ |
|
254 ((c codePoint < 16r7F) |
|
255 and:[ ('+;?& ' includes:c) not ]) ifTrue: [ |
|
256 ws nextPut: c. |
|
257 ] ifFalse:[ |
|
258 ws nextPut: $%. |
|
259 c codePoint printOn:ws base:16. |
|
260 ] |
|
261 ] |
|
262 ]. |
|
263 ^ ws contents |
|
264 |
|
265 " |
|
266 self escape:'a b' |
|
267 self escape:'a%b' |
|
268 self escape:'a b' |
|
269 self escape:'a+b' |
|
270 self escape:'aäüöb' |
|
271 " |
|
272 ! ! |
|
273 |
|
274 !HTMLUtilities class methodsFor:'documentation'! |
|
275 |
|
276 version |
|
277 ^ '$Header: /cvs/stx/stx/libbasic2/HTMLUtilities.st,v 1.1 2008-12-03 12:23:06 cg Exp $' |
|
278 ! ! |