HTMLUtilities.st
author sr
Wed, 08 Jul 2009 12:40:24 +0200
changeset 2179 c1cee8bbc1e5
parent 2144 c89258333f4d
child 2434 5625df4b6119
permissions -rw-r--r--
unescape: care for invalid escape sequence (%, %singleDigit atEnd)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     1
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     2
 COPYRIGHT (c) 2007 by eXept Software AG
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
              All Rights Reserved
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
 This software is furnished under a license and may be used
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
 only in accordance with the terms of that license and with the
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
 inclusion of the above copyright notice.   This software may not
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 be provided or otherwise made available to, or used by, any
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 other person.  No title to or ownership of the software is
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 hereby transferred.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
"{ Package: 'stx:libbasic2' }"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
Object subclass:#HTMLUtilities
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
	instanceVariableNames:''
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
	classVariableNames:'EscapeControlCharacters'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    17
	poolDictionaries:''
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    18
	category:'Net-Communication-Support'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
!HTMLUtilities class methodsFor:'documentation'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
copyright
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
 COPYRIGHT (c) 2007 by eXept Software AG
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    26
              All Rights Reserved
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    27
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    28
 This software is furnished under a license and may be used
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    29
 only in accordance with the terms of that license and with the
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    30
 inclusion of the above copyright notice.   This software may not
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    31
 be provided or otherwise made available to, or used by, any
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    32
 other person.  No title to or ownership of the software is
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    33
 hereby transferred.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    34
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    35
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    36
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    37
documentation
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    38
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    39
    Collected support functions to deal with HTML.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    40
    Used both by HTML generators (DocGenerator), HTMLParsers and the webServer.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    41
    Therefore, it has been put into libbasic2.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    42
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    43
! !
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    44
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    45
!HTMLUtilities class methodsFor:'helpers'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    46
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    47
controlCharacters
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    48
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    49
    EscapeControlCharacters isNil ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    50
        EscapeControlCharacters := Dictionary new.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    51
"/        EscapeControlCharacters at:Character space put:'&nbsp;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    52
        EscapeControlCharacters at:$< put:'&lt;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    53
        EscapeControlCharacters at:$> put:'&gt;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    54
        EscapeControlCharacters at:$& put:'&amp;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    55
        EscapeControlCharacters at:$" put:'&quot;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    56
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    57
    ^ EscapeControlCharacters.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    58
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    59
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    60
escapeCharacterEntities:aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    61
    "helper to escape invalid/dangerous characters in html strings.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    62
     These are:
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    63
        control characters, '<', '>', '&' and space -> %XX ascii as hex digits
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    64
        %     -> %%
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    65
    "
2066
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
    66
    "/ TODO: this is similar to withSpecialHTMLCharactersEscaped.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
    67
    "/ we should refactor this into one method only (can we do hex escapes always ?).
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
    68
    "/ Notice, that these two methods came into existance due to historic reasons
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
    69
    "/ and were developed independent of each other, but later moved to this common place.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
    70
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
    71
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    72
    |rs ws c controlCharacters controlString|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    73
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    74
    controlCharacters := self controlCharacters.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    75
    rs := ReadStream on: aString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    76
    ws := WriteStream on: ''.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    77
    [ rs atEnd ] whileFalse: [
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    78
        c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    79
        controlString := controlCharacters at:c ifAbsent:nil.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    80
        controlString notNil ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    81
            ws nextPutAll:controlString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    82
        ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    83
            c codePoint > 16r7F ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    84
                ws 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    85
                    nextPutAll:'&#';
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    86
                    nextPutAll:(c codePoint printString);
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    87
                    nextPutAll:';'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    88
            ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    89
                ws nextPut:c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    90
            ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    91
        ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    92
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    93
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    94
    
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    95
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    96
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    97
     self escapeCharacterEntities:'a<b'     
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    98
     self escapeCharacterEntities:'aöb'     
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   100
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   101
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   102
extractCharSetEncodingFromContentType:contentTypeLine
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   103
    |idx rest encoding|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   105
    idx := contentTypeLine findString:'charset='.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   106
    idx == 0 ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   107
	^ nil
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   108
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   109
    rest := (contentTypeLine copyFrom:idx+'charset=' size) withoutSeparators.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   110
    idx := (rest indexOfSeparator) min:(rest indexOf:$;).
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   111
    idx == 0 ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   112
	encoding := rest
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   113
    ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   114
	encoding := rest copyTo:idx-1.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   115
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   116
    (encoding startsWith:$") ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   117
	encoding := encoding copyFrom:2 to:(encoding indexOf:$" startingAt:3)-1.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   118
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   119
    ^ encoding.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   120
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   121
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   122
     self extractCharSetEncodingFromContentType:'text/html; charset=ascii'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   123
     self extractCharSetEncodingFromContentType:'text/html; charset='
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   124
     self extractCharSetEncodingFromContentType:'text/html; fooBar=bla'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   125
     self extractCharSetEncodingFromContentType:'text/xml; charset=utf-8'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   126
     self extractCharSetEncodingFromContentType:'text/xml; charset=utf-8; bla=fasel'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   127
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   128
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   129
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   130
extractMimeTypeFromContentType:contentTypeLine
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   131
    |idx mimeAndEncoding|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   132
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   133
    idx := contentTypeLine indexOf:$:.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   134
    mimeAndEncoding := (contentTypeLine copyFrom:idx+1) withoutSeparators.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   135
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   136
    (mimeAndEncoding includes:$;) ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   137
	^ mimeAndEncoding
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   138
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   139
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   140
    idx := mimeAndEncoding indexOf:$;.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   141
    ^ mimeAndEncoding copyTo:idx-1
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   142
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   143
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   144
     self extractMimeTypeFromContentType:'text/html; charset=ascii'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   145
     self extractMimeTypeFromContentType:'text/html; '
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   146
     self extractMimeTypeFromContentType:'text/html'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   147
     self extractMimeTypeFromContentType:'text/xml; charset=utf-8'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   148
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   149
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   150
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   151
unEscape:aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   152
    "Convert escaped characters in an urls arguments or post fields to their proper characters.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   153
     These are:
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   154
        + -> space
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   155
        %XX ascii as hex digits
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   156
        %% -> %
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   157
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   158
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   159
    |rs ws c peekC|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   160
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   161
    (aString indexOfAny:'+%') == 0 ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   162
        ^ aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   163
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   164
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   165
    rs := ReadStream on: aString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   166
    ws := WriteStream on: ''.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   167
    [rs atEnd] whileFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   168
        c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   169
        c == $+ 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   170
            ifTrue:[ c := Character space ] 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   171
            ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   172
                c == $% 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   173
                    ifTrue: [
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   174
                        peekC := rs peek.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   175
                        (peekC notNil and:[peekC isHexDigit]) ifTrue:[
2179
c1cee8bbc1e5 unescape: care for invalid escape sequence (%, %singleDigit atEnd)
sr
parents: 2144
diff changeset
   176
                            c := (Integer readFrom:(rs nextAvailable:2) radix:16) asCharacter 
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   177
                        ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   178
                            c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   179
                        ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   180
                    ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   181
            ].
2179
c1cee8bbc1e5 unescape: care for invalid escape sequence (%, %singleDigit atEnd)
sr
parents: 2144
diff changeset
   182
        c notNil ifTrue:[ ws nextPut: c ].
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   183
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   184
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   185
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   186
    "
2087
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   187
     self unEscape:'a%20b'   
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   188
     self unEscape:'a%%b'
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   189
     self unEscape:'a+b' 
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   190
     self unEscape:'a%+b' 
2179
c1cee8bbc1e5 unescape: care for invalid escape sequence (%, %singleDigit atEnd)
sr
parents: 2144
diff changeset
   191
     self unEscape:'a%' 
c1cee8bbc1e5 unescape: care for invalid escape sequence (%, %singleDigit atEnd)
sr
parents: 2144
diff changeset
   192
     self unEscape:'a%2' 
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   193
    "
2179
c1cee8bbc1e5 unescape: care for invalid escape sequence (%, %singleDigit atEnd)
sr
parents: 2144
diff changeset
   194
c1cee8bbc1e5 unescape: care for invalid escape sequence (%, %singleDigit atEnd)
sr
parents: 2144
diff changeset
   195
    "Modified: / 08-07-2009 / 12:40:56 / sr"
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   196
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   197
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   198
urlEncoded: aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   199
    "helper to escape invalid/dangerous characters in an urls arguments or post-fields.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   200
        see: application/x-www-form-urlencoded
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   201
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   202
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   203
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   204
self unEscape:(self urlEncoded:'_-.*Frankfurt(Main) Hbf')
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   205
self urlEncoded:'_-.*Frankfurt(Main) Hbf') unescape
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   206
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   207
self unEscape:(self urlEncoded:'-_.*%exept;')
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   208
self urlEncoded:'-_.*%exept;'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   209
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   210
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   211
    | rs ws c space|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   212
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   213
    space := Character space.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   214
    rs := ReadStream on: aString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   215
    ws := WriteStream on: ''.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   216
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   217
    [ rs atEnd ] whileFalse: [
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   218
        c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   219
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   220
        c isLetterOrDigit ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   221
            ws nextPut:c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   222
        ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   223
            c == space ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   224
                ws nextPut:$+.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   225
            ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   226
                ('-_.*' includes:c) ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   227
                    ws nextPut:c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   228
                ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   229
                    ws nextPut: $%.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   230
                    c codePoint printOn:ws base:16.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   231
                ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   232
            ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   233
        ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   234
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   235
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   236
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   237
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   238
    "
2087
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   239
     self escape:'a b'      
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   240
     self escape:'a%b'    
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   241
     self escape:'a b'      
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   242
     self escape:'a+b'      
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   243
    "
2066
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   244
!
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   245
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   246
withSpecialHTMLCharactersEscaped:aStringOrCharacter
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   247
    "replace ampersand, less and greater by html-character escapes"
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   248
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   249
    "/ TODO: this is similar to escapeCharacterEntities.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   250
    "/ we should refactor this into one method only (can we do hex escapes always ?).
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   251
    "/ Notice, that these two methods came into existance due to historic reasons
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   252
    "/ and were developed independent of each other, but later moved to this common place.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   253
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   254
    |resultStream orgs repls|
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   255
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   256
"/    orgs  := #( $&      $<     $>     ).
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   257
"/    repls := #( '&amp;' '&lt;' '&gt;' ).
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   258
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   259
    (aStringOrCharacter isString
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   260
    and:[ (aStringOrCharacter includesAny:'&<>') not ]) ifTrue:[^ aStringOrCharacter].
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   261
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   262
    resultStream := WriteStream on:''.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   263
    aStringOrCharacter asString do:[:eachCharacter |
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   264
        "/ huh - a switch. Sorry, but this method is used heavily.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   265
        eachCharacter == $&
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   266
            ifTrue:[ resultStream nextPutAll:'&amp;' ]
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   267
            ifFalse:[
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   268
        eachCharacter == $<
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   269
            ifTrue:[ resultStream nextPutAll:'&lt;' ]
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   270
            ifFalse:[
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   271
        eachCharacter == $>
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   272
            ifTrue:[ resultStream nextPutAll:'&gt;' ]
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   273
            ifFalse:[
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   274
                resultStream nextPut:eachCharacter
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   275
            ]]].
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   276
    ].
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   277
    ^ resultStream contents
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   278
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   279
    "
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   280
     self withSpecialHTMLCharactersEscaped:'<>#&'
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   281
     self withSpecialHTMLCharactersEscaped:$<
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   282
     self withSpecialHTMLCharactersEscaped:$#
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   283
    "
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   284
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   285
    "Modified: / 05-12-2006 / 13:48:59 / cg"
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   286
! !
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   287
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   288
!HTMLUtilities class methodsFor:'serving-helpers'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   289
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   290
escape:aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   291
    "helper to escape invalid/dangerous characters in an urls arguments or post-fields.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   292
     These are:
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   293
        control characters, '+', ';', '?', '&' and space -> %XX ascii as hex digits
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   294
        %     -> %%
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   295
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   296
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   297
    | rs ws c |
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   298
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   299
    rs := ReadStream on: aString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   300
    ws := WriteStream on: ''.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   301
    [ rs atEnd ] whileFalse: [
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   302
        c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   303
        c == $% ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   304
            ws nextPutAll: '%%'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   305
        ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   306
            ((c codePoint < 16r7F)
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   307
             and:[ ('+;?& ' includes:c) not ]) ifTrue: [ 
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   308
                ws nextPut: c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   309
            ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   310
                ws nextPut: $%.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   311
                c codePoint printOn:ws base:16.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   312
            ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   313
        ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   314
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   315
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   316
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   317
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   318
     self escape:'a b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   319
     self escape:'a%b'    
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   320
     self escape:'a b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   321
     self escape:'a+b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   322
     self escape:'aäüöb'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   323
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   324
! !
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   325
2144
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   326
!HTMLUtilities class methodsFor:'text processing helpers'!
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   327
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   328
plainTextOfHTML:htmlString
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   329
    "given some HTML, extract the raw text. 
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   330
     Can be used to search for strings in some html text."
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   331
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   332
    |parser doc s|
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   333
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   334
    s := CharacterWriteStream on:(String new:100).
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   335
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   336
    parser := HTMLParser new.
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   337
    doc := parser parseText:htmlString.
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   338
    doc markUpElementsDo:[:el |
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   339
        |t|
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   340
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   341
        el isTextElement ifTrue:[
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   342
            t := el text withoutSeparators.
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   343
            t notEmpty ifTrue:[
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   344
                s nextPutAll:t.
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   345
                s space.
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   346
            ].
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   347
        ] ifFalse:[
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   348
            "/ ignore non-text; however, we could care for text in info-titles
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   349
            "/ or scripts as well...
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   350
        ].
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   351
    ].
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   352
    ^ s contents asSingleByteStringIfPossible
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   353
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   354
    "
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   355
     self plainTextOfHTML:'
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   356
bla1 bla2 <br>bla3 <table><tr><td>bla4</td></tr></table> bla5<p>bla6
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   357
'        
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   358
    "
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   359
! !
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   360
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   361
!HTMLUtilities class methodsFor:'documentation'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   362
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   363
version
2179
c1cee8bbc1e5 unescape: care for invalid escape sequence (%, %singleDigit atEnd)
sr
parents: 2144
diff changeset
   364
    ^ '$Header: /cvs/stx/stx/libbasic2/HTMLUtilities.st,v 1.6 2009-07-08 10:40:24 sr Exp $'
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   365
! !