HTMLUtilities.st
author Claus Gittinger <cg@exept.de>
Wed, 27 Mar 2019 15:22:56 +0100
changeset 4924 b171682381a1
parent 4737 610d483cb00a
child 4929 6220f244a435
permissions -rw-r--r--
#TUNING by cg class: HTMLUtilities class comment/format in: #characterFromHtmlEntityNamed: #htmlEntityToCharacter changed: #htmlEntityForCharacter:
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
     1
"{ Encoding: utf8 }"
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
     2
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
 COPYRIGHT (c) 2007 by eXept Software AG
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
              All Rights Reserved
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
 This software is furnished under a license and may be used
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 only in accordance with the terms of that license and with the
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 inclusion of the above copyright notice.   This software may not
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 be provided or otherwise made available to, or used by, any
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
 other person.  No title to or ownership of the software is
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
 hereby transferred.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
"{ Package: 'stx:libbasic2' }"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
    16
"{ NameSpace: Smalltalk }"
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
    17
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    18
Object subclass:#HTMLUtilities
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
	instanceVariableNames:''
4517
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    20
	classVariableNames:'EscapeControlCharacters HtmlEntityToCharacter'
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
	poolDictionaries:''
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
	category:'Net-Communication-Support'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
!HTMLUtilities class methodsFor:'documentation'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    26
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    27
copyright
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    28
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    29
 COPYRIGHT (c) 2007 by eXept Software AG
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    30
              All Rights Reserved
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    31
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    32
 This software is furnished under a license and may be used
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    33
 only in accordance with the terms of that license and with the
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    34
 inclusion of the above copyright notice.   This software may not
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    35
 be provided or otherwise made available to, or used by, any
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    36
 other person.  No title to or ownership of the software is
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    37
 hereby transferred.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    38
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    39
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    40
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    41
documentation
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    42
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    43
    Collected support functions to deal with HTML.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    44
    Used both by HTML generators (DocGenerator), HTMLParsers and the webServer.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    45
    Therefore, it has been put into libbasic2.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    46
"
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    47
! !
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    48
2442
db061ff41012 added: #openLauncherOnDisplay:
sr
parents: 2436
diff changeset
    49
!HTMLUtilities class methodsFor:'common actions'!
db061ff41012 added: #openLauncherOnDisplay:
sr
parents: 2436
diff changeset
    50
db061ff41012 added: #openLauncherOnDisplay:
sr
parents: 2436
diff changeset
    51
openLauncherOnDisplay:displayName
2458
8c1955020123 changed: #openLauncherOnDisplay:
sr
parents: 2442
diff changeset
    52
    <resource: #obsolete>
2442
db061ff41012 added: #openLauncherOnDisplay:
sr
parents: 2436
diff changeset
    53
2458
8c1955020123 changed: #openLauncherOnDisplay:
sr
parents: 2442
diff changeset
    54
    "obsolete - do not use"
2442
db061ff41012 added: #openLauncherOnDisplay:
sr
parents: 2436
diff changeset
    55
2458
8c1955020123 changed: #openLauncherOnDisplay:
sr
parents: 2442
diff changeset
    56
    self obsoleteMethodWarning.
8c1955020123 changed: #openLauncherOnDisplay:
sr
parents: 2442
diff changeset
    57
    Error handle:[:ex |
8c1955020123 changed: #openLauncherOnDisplay:
sr
parents: 2442
diff changeset
    58
        ^ ex description
8c1955020123 changed: #openLauncherOnDisplay:
sr
parents: 2442
diff changeset
    59
    ] do:[
8c1955020123 changed: #openLauncherOnDisplay:
sr
parents: 2442
diff changeset
    60
        NewLauncher openLauncherOnInitializedDisplayNamed:displayName
8c1955020123 changed: #openLauncherOnDisplay:
sr
parents: 2442
diff changeset
    61
    ]
2442
db061ff41012 added: #openLauncherOnDisplay:
sr
parents: 2436
diff changeset
    62
2458
8c1955020123 changed: #openLauncherOnDisplay:
sr
parents: 2442
diff changeset
    63
    "Modified: / 01-06-2010 / 11:25:12 / sr"
2442
db061ff41012 added: #openLauncherOnDisplay:
sr
parents: 2436
diff changeset
    64
! !
db061ff41012 added: #openLauncherOnDisplay:
sr
parents: 2436
diff changeset
    65
4517
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    66
!HTMLUtilities class methodsFor:'constants'!
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    67
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    68
htmlEntityToCharacter
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    69
    |htmlEntityToCharacter|
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    70
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    71
    HtmlEntityToCharacter isNil ifTrue:[
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    72
        htmlEntityToCharacter := Dictionary new.
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    73
        htmlEntityToCharacter
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    74
            at:'quot'               put:$";
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    75
            at:'amp'                put:$&;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    76
            at:'apos'               put:$';
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    77
            at:'lt'                 put:$<;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    78
            at:'gt'                 put:$>;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    79
            at:'Auml'               put:$Ä;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    80
            at:'Ouml'               put:$Ö;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    81
            at:'Uuml'               put:$Ü;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    82
            at:'szlig'              put:$ß;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    83
            at:'auml'               put:$ä;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    84
            at:'ouml'               put:$ö;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    85
            at:'uuml'               put:$ü.
4924
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
    86
            "/ where to get the mapping???
4517
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    87
            "/ Answer: It is a mess. A good start may be
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    88
            "/ https://www.w3.org/TR/html4/sgml/entities.html with 252 named entities.
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    89
            "/ I guess an actual lookup table would be adequate.
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    90
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    91
        HtmlEntityToCharacter := htmlEntityToCharacter.
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    92
    ].
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    93
4924
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
    94
    ^ HtmlEntityToCharacter
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
    95
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
    96
    "Modified (comment): / 27-03-2019 / 10:26:27 / Claus Gittinger"
4517
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    97
! !
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
    98
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
!HTMLUtilities class methodsFor:'helpers'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   100
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   101
characterFromHtmlEntityNamed:anHtmlEntityName
4517
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   102
    ^ self htmlEntityToCharacter
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   103
        at:anHtmlEntityName
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   104
        ifAbsent:[
4924
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
   105
            self halt. 
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
   106
            "/ where to get the mapping???
4517
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   107
            "/ Answer: It is a mess. A good start may be
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   108
            "/ https://www.w3.org/TR/html4/sgml/entities.html with 252 named entities.
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   109
            "/ I guess an actual lookup table would be adequate.
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   110
            $~
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   111
        ]
4924
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
   112
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
   113
    "Modified (format): / 27-03-2019 / 10:25:58 / Claus Gittinger"
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   114
!
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   115
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   116
controlCharacters
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   117
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   118
    EscapeControlCharacters isNil ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   119
        EscapeControlCharacters := Dictionary new.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   120
        EscapeControlCharacters at:$< put:'&lt;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   121
        EscapeControlCharacters at:$> put:'&gt;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   122
        EscapeControlCharacters at:$& put:'&amp;'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   123
        EscapeControlCharacters at:$" put:'&quot;'.
2436
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   124
        "/ EscapeControlCharacters at:$' put:'&apos;'.
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   125
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   126
    ^ EscapeControlCharacters.
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   127
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   128
    "Modified (comment): / 06-05-2015 / 16:17:31 / sr"
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   129
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   130
4517
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   131
copyReplaceCharactersWithHtmlEntitiesIn:aString
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   132
    |stream htmlEntity|
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   133
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   134
    stream := '' writeStream.
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   135
    (aString ? '') do:[:eachCharacter |
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   136
        htmlEntity := self htmlEntityForCharacter:eachCharacter.
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   137
        htmlEntity isNil ifTrue:[
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   138
            stream nextPut:eachCharacter.
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   139
        ] ifFalse:[
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   140
            stream
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   141
                nextPut:$&;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   142
                nextPutAll:htmlEntity;
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   143
                nextPut:$;.           
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   144
        ].
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   145
    ].
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   146
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   147
    ^ stream contents
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   148
!
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   149
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   150
escapeCharacterEntities:aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   151
    "helper to escape invalid/dangerous characters in html strings.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   152
     These are:
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   153
        control characters, '<', '>', '&' and space -> %XX ascii as hex digits
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   154
        %     -> %%
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   155
    "
2066
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   156
    "/ TODO: this is similar to withSpecialHTMLCharactersEscaped.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   157
    "/ we should refactor this into one method only (can we do hex escapes always ?).
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   158
    "/ Notice, that these two methods came into existance due to historic reasons
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   159
    "/ and were developed independent of each other, but later moved to this common place.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   160
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   161
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   162
    ^self escapeCharacterEntities:aString andControlCharacters:self controlCharacters
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   163
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   164
    "
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   165
     self escapeCharacterEntities:'a<b'     
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   166
     self escapeCharacterEntities:'aöb'     
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   167
    "
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   168
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   169
    "Modified: / 06-05-2015 / 16:30:13 / sr"
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   170
!
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   171
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   172
escapeCharacterEntities:aString andControlCharacters:controlCharacters
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   173
    "helper to escape invalid/dangerous characters in html strings.
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   174
     These are:
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   175
        control characters, '<', '>', '&' and space -> %XX ascii as hex digits
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   176
        %     -> %%
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   177
    "
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   178
    "/ TODO: this is similar to withSpecialHTMLCharactersEscaped.
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   179
    "/ we should refactor this into one method only (can we do hex escapes always ?).
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   180
    "/ Notice, that these two methods came into existance due to historic reasons
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   181
    "/ and were developed independent of each other, but later moved to this common place.
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   182
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   183
4296
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   184
    ^ String 
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   185
        streamContents:[:ws |
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   186
            self escapeCharacterEntities:aString andControlCharacters:controlCharacters on:ws.
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   187
        ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   188
    
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   189
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   190
     self escapeCharacterEntities:'a<b'     
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   191
     self escapeCharacterEntities:'aöb'     
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   192
    "
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   193
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   194
    "Created: / 06-05-2015 / 16:29:51 / sr"
4296
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   195
    "Modified (format): / 05-02-2017 / 17:59:32 / cg"
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   196
!
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   197
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   198
escapeCharacterEntities:aString andControlCharacters:controlCharacters on:aWriteStream
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   199
    "helper to escape invalid/dangerous characters in html strings.
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   200
     These are:
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   201
        control characters, '<', '>', '&' and space -> %XX ascii as hex digits
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   202
        %     -> %%
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   203
    "
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   204
    "/ TODO: this is similar to withSpecialHTMLCharactersEscaped.
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   205
    "/ we should refactor this into one method only (can we do hex escapes always ?).
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   206
    "/ Notice, that these two methods came into existance due to historic reasons
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   207
    "/ and were developed independent of each other, but later moved to this common place.
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   208
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   209
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   210
    |rs c controlString|
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   211
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   212
    rs := ReadStream on: aString.
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   213
    [ rs atEnd ] whileFalse: [
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   214
        c := rs next.
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   215
        controlString := controlCharacters notEmptyOrNil ifTrue:[controlCharacters at:c ifAbsent:nil] ifFalse:[nil].
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   216
        controlString notNil ifTrue:[
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   217
            aWriteStream nextPutAll:controlString.
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   218
        ] ifFalse:[
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   219
            c codePoint > 16r7F ifTrue:[
4333
2e428045cb82 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4302
diff changeset
   220
                aWriteStream nextPutAll:'&#'.
2e428045cb82 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4302
diff changeset
   221
                c codePoint printOn:aWriteStream.
2e428045cb82 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4302
diff changeset
   222
                aWriteStream nextPut:$;.
4296
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   223
            ] ifFalse:[
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   224
                aWriteStream nextPut:c.
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   225
            ]
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   226
        ]
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   227
    ].
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   228
    
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   229
    "
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   230
     self escapeCharacterEntities:'a<b'     
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   231
     self escapeCharacterEntities:'aöb'     
4296
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   232
    "
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   233
0da79cbe040b #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4287
diff changeset
   234
    "Created: / 05-02-2017 / 17:58:34 / cg"
4333
2e428045cb82 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4302
diff changeset
   235
    "Modified: / 17-02-2017 / 10:34:20 / stefan"
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   236
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   237
4297
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   238
escapeCharacterEntities:aString on:aStream
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   239
    "helper to escape invalid/dangerous characters in html strings.
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   240
     These are:
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   241
        control characters, '<', '>', '&' and space -> %XX ascii as hex digits
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   242
        %     -> %%
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   243
    "
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   244
    "/ TODO: this is similar to withSpecialHTMLCharactersEscaped.
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   245
    "/ we should refactor this into one method only (can we do hex escapes always ?).
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   246
    "/ Notice, that these two methods came into existance due to historic reasons
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   247
    "/ and were developed independent of each other, but later moved to this common place.
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   248
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   249
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   250
    ^self escapeCharacterEntities:aString andControlCharacters:self controlCharacters on:aStream
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   251
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   252
    "
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   253
     self escapeCharacterEntities:'a<b'     
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   254
     self escapeCharacterEntities:'aöb'     
4297
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   255
    "
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   256
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   257
    "Created: / 05-02-2017 / 18:00:56 / cg"
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   258
!
0908351381fd #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4296
diff changeset
   259
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   260
extractCharSetEncodingFromContentType:contentTypeLine
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   261
    |idx rest encoding|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   262
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   263
    idx := contentTypeLine findString:'charset='.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   264
    idx == 0 ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   265
	^ nil
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   266
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   267
    rest := (contentTypeLine copyFrom:idx+'charset=' size) withoutSeparators.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   268
    idx := (rest indexOfSeparator) min:(rest indexOf:$;).
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   269
    idx == 0 ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   270
	encoding := rest
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   271
    ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   272
	encoding := rest copyTo:idx-1.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   273
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   274
    (encoding startsWith:$") ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   275
	encoding := encoding copyFrom:2 to:(encoding indexOf:$" startingAt:3)-1.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   276
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   277
    ^ encoding.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   278
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   279
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   280
     self extractCharSetEncodingFromContentType:'text/html; charset=ascii'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   281
     self extractCharSetEncodingFromContentType:'text/html; charset='
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   282
     self extractCharSetEncodingFromContentType:'text/html; fooBar=bla'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   283
     self extractCharSetEncodingFromContentType:'text/xml; charset=utf-8'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   284
     self extractCharSetEncodingFromContentType:'text/xml; charset=utf-8; bla=fasel'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   285
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   286
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   287
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   288
extractMimeTypeFromContentType:contentTypeLine
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   289
    |idx mimeAndEncoding|
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   290
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   291
    idx := contentTypeLine indexOf:$:.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   292
    mimeAndEncoding := (contentTypeLine copyFrom:idx+1) withoutSeparators.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   293
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   294
    (mimeAndEncoding includes:$;) ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   295
	^ mimeAndEncoding
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   296
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   297
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   298
    idx := mimeAndEncoding indexOf:$;.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   299
    ^ mimeAndEncoding copyTo:idx-1
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   300
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   301
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   302
     self extractMimeTypeFromContentType:'text/html; charset=ascii'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   303
     self extractMimeTypeFromContentType:'text/html; '
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   304
     self extractMimeTypeFromContentType:'text/html'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   305
     self extractMimeTypeFromContentType:'text/xml; charset=utf-8'
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   306
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   307
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   308
4517
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   309
htmlEntityForCharacter:aCharacter
4924
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
   310
    aCharacter == Character space ifTrue:[^ nil].
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
   311
    aCharacter isLetterOrDigit ifTrue:[^ nil].
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
   312
    
4517
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   313
    ^ self htmlEntityToCharacter
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   314
        keyAtValue:aCharacter
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   315
        ifAbsent:nil
4924
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
   316
b171682381a1 #TUNING by cg
Claus Gittinger <cg@exept.de>
parents: 4737
diff changeset
   317
    "Modified: / 27-03-2019 / 10:24:29 / Claus Gittinger"
4517
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   318
!
5c92422a4187 #FEATURE by sr
sr
parents: 4494
diff changeset
   319
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   320
unEscape:aString
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   321
    "Convert escaped characters in an urls arguments or post fields back to their proper characters.
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   322
     Undoes the effect of #urlEncoded: and #urlEncoded2:.
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   323
     These are:
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   324
        + -> space
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   325
        %XX ascii as hex digits
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   326
        %uXXXX unicode as hex digits   NOTE: %u is non-standard bit implemented in MS IIS
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   327
        %% -> %
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   328
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   329
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   330
    |rs ws c peekC isUnicodeEscaped|
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   331
4204
481e0286fce9 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 3660
diff changeset
   332
    aString isNil ifTrue:[
481e0286fce9 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 3660
diff changeset
   333
        ^ nil.
481e0286fce9 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 3660
diff changeset
   334
    ].
481e0286fce9 #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 3660
diff changeset
   335
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   336
    (aString includesAny:'+%') ifFalse:[        
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   337
        ^ aString
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   338
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   339
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   340
    rs := ReadStream on: aString.
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   341
    ws := CharacterWriteStream on: ''.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   342
    isUnicodeEscaped := false.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   343
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   344
    [rs atEnd] whileFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   345
        c := rs next.
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   346
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   347
        isUnicodeEscaped ifTrue:[
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   348
            isUnicodeEscaped := false.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   349
            c := (Integer readFrom:(rs nextAvailable:4) radix:16) asCharacter.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   350
        ] ifFalse:[
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   351
            c == $+ ifTrue:[ 
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   352
                c := Character space.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   353
            ] ifFalse:[
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   354
                c == $% ifTrue:[
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   355
                    peekC := rs peek.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   356
                    (peekC notNil and:[peekC isHexDigit]) ifTrue:[
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   357
                        c := (Integer readFrom:(rs nextAvailable:2) radix:16) asCharacter. 
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   358
                    ] ifFalse:[
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   359
                        (peekC notNil and:[peekC == $u]) ifTrue:[
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   360
                            isUnicodeEscaped := true.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   361
                            c := nil.
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   362
                        ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   363
                            c := rs next.
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   364
                        ].
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   365
                    ].
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   366
                ].
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   367
            ].
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   368
        ].
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   369
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   370
        c notNil ifTrue:[ 
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   371
            ws nextPut:c.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   372
        ].
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   373
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   374
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   375
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   376
    "
2087
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   377
     self unEscape:'a%20b'   
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   378
     self unEscape:'a%%b'
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   379
     self unEscape:'a+b' 
6a7385a63ce0 *** empty log message ***
sr
parents: 2067
diff changeset
   380
     self unEscape:'a%+b' 
2179
c1cee8bbc1e5 unescape: care for invalid escape sequence (%, %singleDigit atEnd)
sr
parents: 2144
diff changeset
   381
     self unEscape:'a%' 
c1cee8bbc1e5 unescape: care for invalid escape sequence (%, %singleDigit atEnd)
sr
parents: 2144
diff changeset
   382
     self unEscape:'a%2' 
4287
7d7b30363fa8 #DOCUMENTATION by cg
Claus Gittinger <cg@exept.de>
parents: 4217
diff changeset
   383
     self unEscape:'/Home/a%C3%A4%C3%B6%C3%BCa'
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   384
    "
2179
c1cee8bbc1e5 unescape: care for invalid escape sequence (%, %singleDigit atEnd)
sr
parents: 2144
diff changeset
   385
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   386
    "Modified: / 09-01-2011 / 10:44:50 / cg"
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   387
    "Modified (comment): / 06-05-2015 / 15:40:04 / sr"
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   388
    "Modified (comment): / 03-02-2017 / 17:06:32 / stefan"
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   389
!
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   390
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   391
unescapeCharacterEntities:aString
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   392
    "helper to unescape character entities in a string.
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   393
     Normally, this is done by the HTMLParser when it scans text,
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   394
     but seems to be also used in post-data fields which contain non-ascii characters
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   395
     (for example: the login postdata of expeccALM).
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   396
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   397
     Sequences are:
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   398
        &<specialName>;
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   399
        &#<decimal>;            
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   400
        &#x<hex>
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   401
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   402
     From Reference:
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   403
        http://wiki.selfhtml.org/wiki/Referenz:HTML/Zeichenreferenz#HTML-eigene_Zeichen
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   404
    "
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   405
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   406
    |rs ws c 
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   407
     entity entityNumberPart
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   408
     htmlEntityMatchingFailed characterFromHtmlEntity|
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   409
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   410
    (aString includes:$&) ifFalse:[        
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   411
        ^ aString
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   412
    ].
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   413
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   414
    rs := ReadStream on:aString.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   415
    ws := CharacterWriteStream on:''.
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   416
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   417
    [rs atEnd] whileFalse:[
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   418
        c := rs next.
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   419
        c == $& ifTrue:[
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   420
            entity := rs upToMatching:[:ch | ch == $;].
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   421
            entity notEmpty ifTrue:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   422
                rs peek == $; ifTrue:[ "/ something between & and ; 
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   423
                    rs next. "/ read over semicolon
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   424
                    htmlEntityMatchingFailed := false.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   425
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   426
                    entity first == $# ifTrue:[ "/ entity is determined as number
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   427
                        entityNumberPart := entity copyFrom:2.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   428
                        entityNumberPart notEmpty ifTrue:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   429
                            entityNumberPart first == $x ifTrue:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   430
                                entityNumberPart := entityNumberPart copyFrom:2.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   431
                                entityNumberPart notEmpty ifTrue:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   432
                                    ws nextPut:(Character value:(Integer readFrom:entityNumberPart radix:16)).
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   433
                                ] ifFalse:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   434
                                    htmlEntityMatchingFailed := true. 
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   435
                                ].
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   436
                            ] ifFalse:[
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   437
                                entityNumberPart isNumeric ifTrue:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   438
                                    ws nextPut:(Character value:(Integer readFrom:entityNumberPart)).
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   439
                                ] ifFalse:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   440
                                    htmlEntityMatchingFailed := true. 
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   441
                                ].
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   442
                            ].
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   443
                        ] ifFalse:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   444
                            htmlEntityMatchingFailed := true. 
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   445
                        ].
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   446
                    ] ifFalse:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   447
                        characterFromHtmlEntity := self characterFromHtmlEntityNamed:entity.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   448
                        characterFromHtmlEntity notNil ifTrue:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   449
                            ws nextPut:characterFromHtmlEntity.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   450
                        ] ifFalse:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   451
                            htmlEntityMatchingFailed := true. 
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   452
                        ].
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   453
                    ].
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   454
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   455
                    htmlEntityMatchingFailed ifTrue:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   456
                        ws nextPut:c.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   457
                        ws nextPutAll:entity.
4333
2e428045cb82 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4302
diff changeset
   458
                        ws nextPut:$;.
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   459
                    ].
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   460
                ] ifFalse:[
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   461
                    ws nextPut:c.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   462
                    ws nextPutAll:entity.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   463
                ].
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   464
            ] ifFalse:[
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   465
                ws nextPut:c.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   466
            ].
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   467
        ] ifFalse:[
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   468
            ws nextPut:c.
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   469
        ].
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   470
    ].
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   471
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   472
    ^ ws contents
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   473
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   474
    "
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   475
     self unescapeCharacterEntities:'&;'            
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   476
     self unescapeCharacterEntities:'&16368;'            
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   477
     self unescapeCharacterEntities:'&16368;&16368'            
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   478
     self unescapeCharacterEntities:'&16368;&lt;'            
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   479
     self unescapeCharacterEntities:'&16368;&lt'            
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   480
     self unescapeCharacterEntities:'&#xaffe;'    
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   481
     self unescapeCharacterEntities:'&quot;&lt;foo'      
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   482
     self unescapeCharacterEntities:'&funny;&lt;foo'     
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   483
    "
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   484
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   485
    "Created: / 06-05-2015 / 16:56:14 / sr"
3557
21e099fb879e class: HTMLUtilities
sr
parents: 3545
diff changeset
   486
    "Modified: / 18-05-2015 / 12:13:35 / sr"
4333
2e428045cb82 #REFACTORING by stefan
Stefan Vogel <sv@exept.de>
parents: 4302
diff changeset
   487
    "Modified: / 17-02-2017 / 10:18:35 / stefan"
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   488
!
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   489
4712
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   490
urlDecoded:aString
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   491
    "Convert escaped characters in an urls arguments or post fields back to their proper characters.
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   492
     Undoes the effect of #urlEncoded: and #urlEncoded2:.
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   493
     These are:
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   494
        + -> space
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   495
        %XX ascii as hex digits
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   496
        %uXXXX unicode as hex digits   NOTE: %u is non-standard bit implemented in MS IIS
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   497
        %% -> %
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   498
    "
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   499
    ^ (self unEscape:aString) utf8Decoded
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   500
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   501
    "
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   502
     self urlDecoded:'a%20b'   
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   503
     self urlDecoded:'a%%b'
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   504
     self urlDecoded:'a+b' 
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   505
     self urlDecoded:'a%+b' 
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   506
     self urlDecoded:'a%' 
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   507
     self urlDecoded:'a%2' 
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   508
     self urlDecoded:'/Home/a%C3%A4%C3%B6%C3%BCa'
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   509
    "
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   510
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   511
    "Created: / 26-08-2018 / 12:49:24 / Claus Gittinger"
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   512
!
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   513
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   514
urlEncode2:aStringOrStream on:ws
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   515
    <resource: #obsolete>
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   516
    "helper to escape invalid/dangerous characters in an urls arguments.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   517
     Similar to urlEncode, but treats '*','~' and spaces differently.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   518
     (some clients, such as bitTorrent seem to require this - time will tell...)
2523
cae6bc936653 changed: #urlEncode2:on:
Claus Gittinger <cg@exept.de>
parents: 2522
diff changeset
   519
     Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_', is encoded using 
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   520
     the '%nn' format, where nn is the hexadecimal value of the byte.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   521
        see: RFC1738"
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   523
    |rs c space|
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   524
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   525
    space := Character space.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   526
    rs := aStringOrStream readStream.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   527
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   528
    [rs atEnd] whileFalse: [
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   529
        c := rs next.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   530
2523
cae6bc936653 changed: #urlEncode2:on:
Claus Gittinger <cg@exept.de>
parents: 2522
diff changeset
   531
        (c isLetterOrDigit or:[ ('-_.' includes:c) ]) ifTrue:[
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   532
            ws nextPut:c.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   533
        ] ifFalse:[
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   534
            ws nextPut: $%.
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   535
            c codePoint > 16rFF ifTrue:[
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   536
                ws nextPut: $u.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   537
                c codePoint printOn:ws base:16 size:4 fill:$0.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   538
            ] ifFalse:[
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   539
                c codePoint printOn:ws base:16 size:2 fill:$0.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   540
            ]
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   541
        ].
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   542
    ].
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   543
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   544
    "Created: / 09-01-2011 / 10:32:27 / cg"
2523
cae6bc936653 changed: #urlEncode2:on:
Claus Gittinger <cg@exept.de>
parents: 2522
diff changeset
   545
    "Modified: / 09-01-2011 / 13:11:17 / cg"
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   546
    "Modified: / 06-05-2015 / 15:43:39 / sr"
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   547
!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   548
2500
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   549
urlEncode:aStringOrStream on:ws
4712
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   550
    "helper to escape invalid/dangerous characters in an urlÄs argument or post-fields.
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   551
4712
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   552
     Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '~', 
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   553
     is encoded using the '%nn' format, where nn is the hexadecimal value of the byte.
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   554
     Characters outside the ASCII range are encoded into utf8 first.
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   555
     Spaces are encoded as '+'.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   556
        see: application/x-www-form-urlencoded  
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   557
        see: https://tools.ietf.org/html/rfc3986 (obsoletes RFC1738)"
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   558
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   559
    |rs c|
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   560
2500
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   561
    rs := aStringOrStream readStream.
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   562
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   563
    [(c := rs nextOrNil) notNil] whileTrue: [
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   564
        |cp|
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   565
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   566
        (c isLetterOrDigit or:['-_.~' includes:c]) ifTrue:[
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   567
            ws nextPut:c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   568
        ] ifFalse:[
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   569
            c == Character space ifTrue:[
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   570
                ws nextPut:$+.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   571
            ] ifFalse:[
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   572
                cp := c codePoint.
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   573
                cp > 16r7F ifTrue:[
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   574
                    c utf8Encoded do:[:eachUtf8Char|
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   575
                        ws nextPut: $%.
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   576
                        eachUtf8Char codePoint printOn:ws base:16 size:2 fill:$0.
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   577
                    ].
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   578
                ] ifFalse:[
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   579
                    ws nextPut: $%.
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   580
                    cp printOn:ws base:16 size:2 fill:$0.
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   581
                ].
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   582
            ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   583
        ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   584
    ].
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   585
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   586
    "
4712
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   587
     self urlEncoded:'hokus pokus fidibus*-/~'
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   588
     self urlEncoded:'Ützel Brötzel*-/~'
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   589
     self urlEncoded:'χαιρε'
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   590
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   591
     self urlDecoded:(self urlEncoded:'hokus pokus fidibus*-/~')
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   592
     self urlDecoded:(self urlEncoded:'Ützel Brötzel*-/~')
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   593
     self urlDecoded:(self urlEncoded:'χαιρε')
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   594
    "
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   595
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   596
    "Modified: / 09-01-2011 / 10:43:30 / cg"
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   597
    "Modified: / 06-05-2015 / 16:06:52 / sr"
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   598
    "Modified (comment): / 07-02-2017 / 14:51:42 / stefan"
4712
530912590b7f #FEATURE by cg
Claus Gittinger <cg@exept.de>
parents: 4517
diff changeset
   599
    "Modified (comment): / 26-08-2018 / 12:50:04 / Claus Gittinger"
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   600
!
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   601
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   602
urlEncoded2: aString
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   603
    <resource: #obsolete>
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   604
    "helper to escape invalid/dangerous characters in an urls arguments or post-fields.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   605
     Similar to urlEncoded, but treats '*','~' and spaces differently.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   606
     (some clients, such as bitTorrent seem to require this - time will tell...)
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   607
     Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '~', is encoded using 
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   608
     the '%nn' format, where nn is the hexadecimal value of the byte.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   609
        see: application/x-www-form-urlencoded  
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   610
        see: RFC1738"
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   611
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   612
    |ws|
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   613
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   614
    ws := String writeStreamWithInitialSize:aString size.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   615
    self urlEncode2:aString on:ws.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   616
    ^ ws contents
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   617
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   618
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   619
    "
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   620
      self unEscape:(self urlEncoded:'_-.*Frankfurt(Main) Hbf')
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   621
      self urlEncoded2:'_-.*Frankfurt(Main) Hbf'
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   622
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   623
      self unEscape:(self urlEncoded:'-_.*%exept;')
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   624
      self urlEncoded2:'-_.*%exept;'  
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   625
      self urlEncoded:'-_.*%exept;'    
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   626
    "
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   627
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   628
    "Created: / 09-01-2011 / 10:34:50 / cg"
2500
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   629
!
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   630
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   631
urlEncoded: aString
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   632
    "helper to escape invalid/dangerous characters in an urls arguments or post-fields.
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   633
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   634
     Any byte not in the set 0-9, a-z, A-Z, '.', '-', '_' and '~', is encoded using 
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   635
     the '%nn' format, where nn is the hexadecimal value of the byte.
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   636
     Characters outside the ASCII range are encoded into utf8 first.
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   637
     Spaces are encoded as '+'.
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   638
        see: application/x-www-form-urlencoded  
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   639
        see: https://tools.ietf.org/html/rfc3986 (obsoletes RFC1738)"
2500
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   640
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   641
    |ws|
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   642
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   643
    ws := WriteStream on:(String new:aString size + 20).
2500
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   644
    self urlEncode:aString on:ws.
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   645
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   646
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   647
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   648
    "
2500
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   649
      self unEscape:(self urlEncoded:'_-.*Frankfurt(Main) Hbf')
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   650
      self urlEncoded:'_-.*Frankfurt(Main) Hbf'
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   651
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   652
      self unEscape:(self urlEncoded:'-_.*%exept;')
Stefan Vogel <sv@exept.de>
parents: 2464
diff changeset
   653
      self urlEncoded:'-_.*%exept;'
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   654
    "
2464
ebff59707514 Patch from CG for UBS
Stefan Vogel <sv@exept.de>
parents: 2458
diff changeset
   655
2522
Claus Gittinger <cg@exept.de>
parents: 2500
diff changeset
   656
    "Modified: / 09-01-2011 / 10:43:37 / cg"
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   657
    "Modified: / 07-02-2017 / 14:54:12 / stefan"
2066
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   658
!
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   659
2436
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   660
withAllSpecialHTMLCharactersEscaped:aStringOrCharacter
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   661
    "replace ampersand, less, greater and quotes by html-character escapes"
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   662
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   663
    "/ TODO: this is similar to escapeCharacterEntities.
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   664
    "/ we should refactor this into one method only (can we do hex escapes always ?).
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   665
    "/ Notice, that these two methods came into existance due to historic reasons
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   666
    "/ and were developed independent of each other, but later moved to this common place.
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   667
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   668
    |resultStream|
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   669
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   670
"/    orgs  := #( $&      $<     $>     $"   $').
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   671
"/    repls := #( '&amp;' '&lt;' '&gt;' &quot; &apos;).
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   672
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   673
    (aStringOrCharacter isString
3098
2ae8f1b57bc1 class: HTMLUtilities
sr
parents: 2866
diff changeset
   674
    and:[ (aStringOrCharacter includesAny:'&<>''"') not ]) ifTrue:[^ aStringOrCharacter].
2436
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   675
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   676
    resultStream := CharacterWriteStream on:''.
2436
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   677
    aStringOrCharacter asString do:[:eachCharacter |
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   678
        "/ huh - a switch. Sorry, but this method is used heavily.
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   679
        eachCharacter == $&
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   680
            ifTrue:[ resultStream nextPutAll:'&amp;' ]
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   681
            ifFalse:[
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   682
        eachCharacter == $<
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   683
            ifTrue:[ resultStream nextPutAll:'&lt;' ]
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   684
            ifFalse:[
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   685
        eachCharacter == $>
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   686
            ifTrue:[ resultStream nextPutAll:'&gt;' ]
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   687
            ifFalse:[
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   688
        eachCharacter == $"
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   689
            ifTrue:[ resultStream nextPutAll:'&quot;' ]
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   690
            ifFalse:[
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   691
        eachCharacter == $'
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   692
            ifTrue:[ resultStream nextPutAll:'&apos;' ]
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   693
            ifFalse:[
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   694
                resultStream nextPut:eachCharacter
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   695
            ]]]]].
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   696
    ].
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   697
    ^ resultStream contents
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   698
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   699
    "
3312
fe3d83508353 class: HTMLUtilities
sr
parents: 3098
diff changeset
   700
     self withAllSpecialHTMLCharactersEscaped:'<>#&'     
fe3d83508353 class: HTMLUtilities
sr
parents: 3098
diff changeset
   701
     self withAllSpecialHTMLCharactersEscaped:$<
fe3d83508353 class: HTMLUtilities
sr
parents: 3098
diff changeset
   702
     self withAllSpecialHTMLCharactersEscaped:$#
2436
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   703
    "
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   704
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   705
    "Modified: / 05-12-2006 / 13:48:59 / cg"
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   706
    "Modified: / 06-05-2015 / 15:41:06 / sr"
2436
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   707
!
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   708
2066
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   709
withSpecialHTMLCharactersEscaped:aStringOrCharacter
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   710
    "replace ampersand, less and greater by html-character escapes"
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   711
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   712
    "/ TODO: this is similar to escapeCharacterEntities.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   713
    "/ we should refactor this into one method only (can we do hex escapes always ?).
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   714
    "/ Notice, that these two methods came into existance due to historic reasons
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   715
    "/ and were developed independent of each other, but later moved to this common place.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   716
2866
259f841e2554 class: HTMLUtilities
Stefan Vogel <sv@exept.de>
parents: 2554
diff changeset
   717
    |resultStream|
2066
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   718
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   719
"/    orgs  := #( $&      $<     $>     ).
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   720
"/    repls := #( '&amp;' '&lt;' '&gt;' ).
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   721
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   722
    (aStringOrCharacter isString
2866
259f841e2554 class: HTMLUtilities
Stefan Vogel <sv@exept.de>
parents: 2554
diff changeset
   723
     and:[ (aStringOrCharacter isWideString not)
259f841e2554 class: HTMLUtilities
Stefan Vogel <sv@exept.de>
parents: 2554
diff changeset
   724
     and:[ (aStringOrCharacter includesAny:'&<>') not ]]) ifTrue:[^ aStringOrCharacter].
2066
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   725
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   726
    resultStream := CharacterWriteStream on:''.
2066
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   727
    aStringOrCharacter asString do:[:eachCharacter |
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   728
        "/ huh - a switch. Sorry, but this method is used heavily.
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   729
        eachCharacter == $&
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   730
            ifTrue:[ resultStream nextPutAll:'&amp;' ]
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   731
            ifFalse:[
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   732
        eachCharacter == $<
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   733
            ifTrue:[ resultStream nextPutAll:'&lt;' ]
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   734
            ifFalse:[
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   735
        eachCharacter == $>
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   736
            ifTrue:[ resultStream nextPutAll:'&gt;' ]
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   737
            ifFalse:[
2554
7cd0f7a16fad changed: #withSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2523
diff changeset
   738
"/        eachCharacter codePoint > 16r7F
7cd0f7a16fad changed: #withSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2523
diff changeset
   739
"/            ifTrue:[ 
7cd0f7a16fad changed: #withSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2523
diff changeset
   740
"/                resultStream
7cd0f7a16fad changed: #withSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2523
diff changeset
   741
"/                    nextPutAll:'&#';
7cd0f7a16fad changed: #withSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2523
diff changeset
   742
"/                    nextPutAll:(eachCharacter codePoint printString);
7cd0f7a16fad changed: #withSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2523
diff changeset
   743
"/                    nextPutAll:';']
7cd0f7a16fad changed: #withSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2523
diff changeset
   744
"/            ifFalse:[
2066
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   745
                resultStream nextPut:eachCharacter
2554
7cd0f7a16fad changed: #withSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2523
diff changeset
   746
"/            ]
2066
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   747
            ]]].
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   748
    ].
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   749
    ^ resultStream contents
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   750
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   751
    "
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   752
     self withSpecialHTMLCharactersEscaped:'<>#&'
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   753
     self withSpecialHTMLCharactersEscaped:$<
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   754
     self withSpecialHTMLCharactersEscaped:$#
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   755
    "
0ee2ef2d018c more common code
Claus Gittinger <cg@exept.de>
parents: 2058
diff changeset
   756
2554
7cd0f7a16fad changed: #withSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2523
diff changeset
   757
    "Modified: / 13-04-2011 / 23:13:32 / cg"
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   758
    "Modified: / 06-05-2015 / 15:41:16 / sr"
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   759
! !
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   760
3647
738252558e04 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3640
diff changeset
   761
!HTMLUtilities class methodsFor:'queries'!
738252558e04 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3640
diff changeset
   762
738252558e04 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3640
diff changeset
   763
isUtilityClass
738252558e04 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3640
diff changeset
   764
    ^ self == HTMLUtilities
738252558e04 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3640
diff changeset
   765
! !
738252558e04 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3640
diff changeset
   766
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   767
!HTMLUtilities class methodsFor:'serving-helpers'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   768
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   769
escape:aString
2436
a5537ae7be4a added: #withAllSpecialHTMLCharactersEscaped:
Claus Gittinger <cg@exept.de>
parents: 2434
diff changeset
   770
    "helper to escape invalid/dangerous characters in an url's arguments or post-fields.
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   771
     These are:
3456
8a3302fd3cce class: HTMLUtilities
Claus Gittinger <cg@exept.de>
parents: 3312
diff changeset
   772
        control characters, dQuote, '+', ';', '?', '&' and space -> %XX ascii as hex digits
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   773
        %     -> %%
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   774
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   775
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   776
    | rs ws c cp|
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   777
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   778
    rs := ReadStream on: aString.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   779
    ws := WriteStream on: ''.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   780
    [ rs atEnd ] whileFalse: [
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   781
        c := rs next.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   782
        c == $% ifTrue:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   783
            ws nextPutAll: '%%'.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   784
        ] ifFalse:[
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   785
            (((cp := c codePoint) < 16r7F)
3456
8a3302fd3cce class: HTMLUtilities
Claus Gittinger <cg@exept.de>
parents: 3312
diff changeset
   786
             and:[ ('+;?&" ' includes:c) not ]) ifTrue: [ 
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   787
                ws nextPut: c.
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   788
            ] ifFalse:[
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   789
                ws nextPut: $%.
4217
1dac9014b77a #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4204
diff changeset
   790
                cp printOn:ws base:16 size:(cp > 16rFF ifTrue:[4] ifFalse:[2]) fill:$0.
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   791
            ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   792
        ]
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   793
    ].
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   794
    ^ ws contents
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   795
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   796
    "
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   797
     self escape:'a b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   798
     self escape:'a%b'    
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   799
     self escape:'a b'      
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   800
     self escape:'a+b'      
4302
f50a1263f3ce #BUGFIX by stefan
Stefan Vogel <sv@exept.de>
parents: 4297
diff changeset
   801
     self escape:'aäüöb'      
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   802
    "
3544
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   803
73c88216a4f2 class: HTMLUtilities
sr
parents: 3456
diff changeset
   804
    "Modified: / 06-05-2015 / 16:07:18 / sr"
4217
1dac9014b77a #REFACTORING by cg
Claus Gittinger <cg@exept.de>
parents: 4204
diff changeset
   805
    "Modified: / 25-11-2016 / 16:37:53 / cg"
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   806
! !
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   807
2144
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   808
!HTMLUtilities class methodsFor:'text processing helpers'!
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   809
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   810
plainTextOfHTML:htmlString
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   811
    "given some HTML, extract the raw text. 
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   812
     Can be used to search for strings in some html text."
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   813
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   814
    |parser doc s first|
2144
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   815
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   816
    parser := HTMLParser new.
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   817
    doc := parser parseText:htmlString.
3660
628279cf644c #REFACTORING
Stefan Vogel <sv@exept.de>
parents: 3659
diff changeset
   818
    s := CharacterWriteStream on:(String new:100).
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   819
    first := true.
2144
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   820
    doc markUpElementsDo:[:el |
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   821
        |t|
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   822
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   823
        el isTextElement ifTrue:[
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   824
            t := el text withoutSeparators.
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   825
            t notEmpty ifTrue:[
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   826
                first ifFalse:[    
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   827
                    s space.
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   828
                ].
2144
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   829
                s nextPutAll:t.
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   830
                first := false    
2144
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   831
            ].
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   832
        ] ifFalse:[
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   833
            "/ ignore non-text; however, we could care for text in info-titles
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   834
            "/ or scripts as well...
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   835
        ].
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   836
    ].
3659
a226a9108bce #REFACTORING
Stefan Vogel <sv@exept.de>
parents: 3647
diff changeset
   837
    ^ s contents
2144
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   838
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   839
    "
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   840
     self plainTextOfHTML:'
4737
610d483cb00a #DOCUMENTATION by stefan
Stefan Vogel <sv@exept.de>
parents: 4712
diff changeset
   841
            bla1 bla2 <br>bla3 <table><tr><td>bla4</td></tr></table> bla5<p>bla6'
610d483cb00a #DOCUMENTATION by stefan
Stefan Vogel <sv@exept.de>
parents: 4712
diff changeset
   842
     self plainTextOfHTML:'Hello World'        
2144
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   843
    "
3545
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   844
070008476ef8 class: HTMLUtilities
sr
parents: 3544
diff changeset
   845
    "Modified: / 06-05-2015 / 17:02:36 / sr"
2144
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   846
! !
c89258333f4d *** empty log message ***
Claus Gittinger <cg@exept.de>
parents: 2087
diff changeset
   847
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   848
!HTMLUtilities class methodsFor:'documentation'!
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   849
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   850
version
3640
098175b79b25 #BUGFIX
sr
parents: 3557
diff changeset
   851
    ^ '$Header$'
2434
5625df4b6119 comment/format in: #escapeCharacterEntities:
Claus Gittinger <cg@exept.de>
parents: 2179
diff changeset
   852
!
5625df4b6119 comment/format in: #escapeCharacterEntities:
Claus Gittinger <cg@exept.de>
parents: 2179
diff changeset
   853
5625df4b6119 comment/format in: #escapeCharacterEntities:
Claus Gittinger <cg@exept.de>
parents: 2179
diff changeset
   854
version_CVS
3640
098175b79b25 #BUGFIX
sr
parents: 3557
diff changeset
   855
    ^ '$Header$'
2058
f407ff58f780 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   856
! !
3098
2ae8f1b57bc1 class: HTMLUtilities
sr
parents: 2866
diff changeset
   857