HTMLUtilities.st
author Claus Gittinger <cg@exept.de>
Wed, 03 Dec 2008 13:23:06 +0100
changeset 2058 f407ff58f780
child 2066 0ee2ef2d018c
permissions -rw-r--r--
initial checkin
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     1
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     2
 COPYRIGHT (c) 2007 by eXept Software AG
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
              All Rights Reserved
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
 This software is furnished under a license and may be used
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
 only in accordance with the terms of that license and with the
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
 inclusion of the above copyright notice.   This software may not
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 be provided or otherwise made available to, or used by, any
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 other person.  No title to or ownership of the software is
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 hereby transferred.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
"{ Package: 'stx:libbasic2' }"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
Object subclass:#HTMLUtilities
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
	instanceVariableNames:''
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
	classVariableNames:'EscapeControlCharacters'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    17
	poolDictionaries:''
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    18
	category:'Net-Communication-Support'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
!HTMLUtilities class methodsFor:'documentation'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
copyright
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
 COPYRIGHT (c) 2007 by eXept Software AG
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    26
              All Rights Reserved
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    27
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    28
 This software is furnished under a license and may be used
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    29
 only in accordance with the terms of that license and with the
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    30
 inclusion of the above copyright notice.   This software may not
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    31
 be provided or otherwise made available to, or used by, any
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    32
 other person.  No title to or ownership of the software is
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    33
 hereby transferred.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    34
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    35
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    36
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    37
documentation
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    38
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    39
    Collected support functions to deal with HTML.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    40
    Used both by HTML generators (DocGenerator), HTMLParsers and the webServer.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    41
    Therefore, it has been put into libbasic2.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    42
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    43
! !
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    44
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    45
!HTMLUtilities class methodsFor:'helpers'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    46
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    47
controlCharacters
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    48
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    49
    EscapeControlCharacters isNil ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    50
        EscapeControlCharacters := Dictionary new.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    51
"/        EscapeControlCharacters at:Character space put:'&nbsp;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    52
        EscapeControlCharacters at:$< put:'&lt;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    53
        EscapeControlCharacters at:$> put:'&gt;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    54
        EscapeControlCharacters at:$& put:'&amp;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    55
        EscapeControlCharacters at:$" put:'&quot;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    56
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    57
    ^ EscapeControlCharacters.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    58
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    59
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    60
escapeCharacterEntities:aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    61
    "helper to escape invalid/dangerous characters in html strings.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    62
     These are:
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    63
        control characters, '<', '>', '&' and space -> %XX ascii as hex digits
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    64
        %     -> %%
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    65
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    66
    |rs ws c controlCharacters controlString|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    67
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    68
    controlCharacters := self controlCharacters.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    69
    rs := ReadStream on: aString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    70
    ws := WriteStream on: ''.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    71
    [ rs atEnd ] whileFalse: [
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    72
        c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    73
        controlString := controlCharacters at:c ifAbsent:nil.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    74
        controlString notNil ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    75
            ws nextPutAll:controlString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    76
        ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    77
            c codePoint > 16r7F ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    78
                ws 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    79
                    nextPutAll:'&#';
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    80
                    nextPutAll:(c codePoint printString);
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    81
                    nextPutAll:';'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    82
            ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    83
                ws nextPut:c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    84
            ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    85
        ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    86
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    87
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    88
    
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    89
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    90
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    91
     self escapeCharacterEntities:'a<b'     
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    92
     self escapeCharacterEntities:'aöb'     
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    93
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    94
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    95
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    96
extractCharSetEncodingFromContentType:contentTypeLine
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    97
    |idx rest encoding|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    98
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
    idx := contentTypeLine findString:'charset='.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   100
    idx == 0 ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   101
	^ nil
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   102
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   103
    rest := (contentTypeLine copyFrom:idx+'charset=' size) withoutSeparators.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
    idx := (rest indexOfSeparator) min:(rest indexOf:$;).
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   105
    idx == 0 ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   106
	encoding := rest
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   107
    ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   108
	encoding := rest copyTo:idx-1.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   109
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   110
    (encoding startsWith:$") ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   111
	encoding := encoding copyFrom:2 to:(encoding indexOf:$" startingAt:3)-1.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   112
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   113
    ^ encoding.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   114
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   115
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   116
     self extractCharSetEncodingFromContentType:'text/html; charset=ascii'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   117
     self extractCharSetEncodingFromContentType:'text/html; charset='
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   118
     self extractCharSetEncodingFromContentType:'text/html; fooBar=bla'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   119
     self extractCharSetEncodingFromContentType:'text/xml; charset=utf-8'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   120
     self extractCharSetEncodingFromContentType:'text/xml; charset=utf-8; bla=fasel'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   121
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   122
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   123
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   124
extractMimeTypeFromContentType:contentTypeLine
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   125
    |idx mimeAndEncoding|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   126
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   127
    idx := contentTypeLine indexOf:$:.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   128
    mimeAndEncoding := (contentTypeLine copyFrom:idx+1) withoutSeparators.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   129
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   130
    (mimeAndEncoding includes:$;) ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   131
	^ mimeAndEncoding
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   132
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   133
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   134
    idx := mimeAndEncoding indexOf:$;.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   135
    ^ mimeAndEncoding copyTo:idx-1
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   136
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   137
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   138
     self extractMimeTypeFromContentType:'text/html; charset=ascii'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   139
     self extractMimeTypeFromContentType:'text/html; '
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   140
     self extractMimeTypeFromContentType:'text/html'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   141
     self extractMimeTypeFromContentType:'text/xml; charset=utf-8'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   142
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   143
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   144
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   145
unEscape:aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   146
    "Convert escaped characters in an urls arguments or post fields to their proper characters.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   147
     These are:
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   148
        + -> space
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   149
        %XX ascii as hex digits
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   150
        %% -> %
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   151
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   152
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   153
    |rs ws c peekC|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   154
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   155
    (aString indexOfAny:'+%') == 0 ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   156
        ^ aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   157
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   158
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   159
    rs := ReadStream on: aString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   160
    ws := WriteStream on: ''.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   161
    [rs atEnd] whileFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   162
        c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   163
        c == $+ 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   164
            ifTrue:[ c := Character space ] 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   165
            ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   166
                c == $% 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   167
                    ifTrue: [
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   168
                        peekC := rs peek.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   169
                        (peekC notNil and:[peekC isHexDigit]) ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   170
                            c := (Integer readFrom:(rs next:2) radix:16) asCharacter 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   171
                        ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   172
                            c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   173
                        ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   174
                    ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   175
            ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   176
        ws nextPut: c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   177
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   178
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   179
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   180
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   181
     self new unEscape:'a%20b' 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   182
     self new unEscape:'a%%b'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   183
     self new unEscape:'a+b' 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   184
     self new unEscape:'a%+b' 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   185
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   186
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   187
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   188
urlEncoded: aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   189
    "helper to escape invalid/dangerous characters in an urls arguments or post-fields.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   190
        see: application/x-www-form-urlencoded
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   191
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   192
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   193
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   194
self unEscape:(self urlEncoded:'_-.*Frankfurt(Main) Hbf')
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   195
self urlEncoded:'_-.*Frankfurt(Main) Hbf') unescape
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   196
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   197
self unEscape:(self urlEncoded:'-_.*%exept;')
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   198
self urlEncoded:'-_.*%exept;'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   199
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   200
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   201
    | rs ws c space|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   202
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   203
    space := Character space.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   204
    rs := ReadStream on: aString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   205
    ws := WriteStream on: ''.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   206
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   207
    [ rs atEnd ] whileFalse: [
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   208
        c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   209
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   210
        c isLetterOrDigit ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   211
            ws nextPut:c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   212
        ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   213
            c == space ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   214
                ws nextPut:$+.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   215
            ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   216
                ('-_.*' includes:c) ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   217
                    ws nextPut:c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   218
                ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   219
                    ws nextPut: $%.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   220
                    c codePoint printOn:ws base:16.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   221
                ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   222
            ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   223
        ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   224
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   225
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   226
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   227
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   228
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   229
     self new escape:'a b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   230
     self new escape:'a%b'    
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   231
     self new escape:'a b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   232
     self new escape:'a+b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   233
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   234
! !
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   235
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   236
!HTMLUtilities class methodsFor:'serving-helpers'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   237
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   238
escape:aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   239
    "helper to escape invalid/dangerous characters in an urls arguments or post-fields.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   240
     These are:
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   241
        control characters, '+', ';', '?', '&' and space -> %XX ascii as hex digits
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   242
        %     -> %%
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   243
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   244
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   245
    | rs ws c |
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   246
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   247
    rs := ReadStream on: aString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   248
    ws := WriteStream on: ''.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   249
    [ rs atEnd ] whileFalse: [
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   250
        c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   251
        c == $% ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   252
            ws nextPutAll: '%%'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   253
        ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   254
            ((c codePoint < 16r7F)
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   255
             and:[ ('+;?& ' includes:c) not ]) ifTrue: [ 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   256
                ws nextPut: c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   257
            ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   258
                ws nextPut: $%.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   259
                c codePoint printOn:ws base:16.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   260
            ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   261
        ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   262
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   263
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   264
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   265
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   266
     self escape:'a b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   267
     self escape:'a%b'    
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   268
     self escape:'a b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   269
     self escape:'a+b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   270
     self escape:'aäüöb'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   271
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   272
! !
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   273
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   274
!HTMLUtilities class methodsFor:'documentation'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   275
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   276
version
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   277
    ^ '$Header: /cvs/stx/stx/libbasic2/HTMLUtilities.st,v 1.1 2008-12-03 12:23:06 cg Exp $'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   278
! !