RegressionTests__HTMLParserTests.st
author Claus Gittinger <cg@exept.de>
Wed, 01 May 2019 22:49:30 +0200
changeset 2220 7cfba4b4d279
parent 2202 145d31a73d22
child 2240 5a1495fa22bb
permissions -rw-r--r--
initial checkin class: RegressionTests::ValueHolderTests added: #test01_BlockValue class: RegressionTests::ValueHolderTests class

"{ Encoding: utf8 }"

"{ Package: 'stx:goodies/regression' }"

"{ NameSpace: RegressionTests }"

TestCase subclass:#HTMLParserTests
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'tests-Regression-XML'
!

!HTMLParserTests class methodsFor:'documentation'!

documentation
"
    documentation to be added.

    [author:]
	mb (mb@SUNGSAM)

    [instance variables:]

    [class variables:]

    [see also:]

"
!

history
    "Created: / 15-01-2009 / 12:44:33 / mb"
! !

!HTMLParserTests methodsFor:'initialize / release'!

setUp
    "common setup - invoked before testing."

    (Smalltalk classNamed:'stx_goodies_webServer_htmlTree') isNil ifTrue:[
        (Smalltalk loadPackage:'stx:goodies/webServer/htmlTree') ifFalse:[
            self error:'stx:goodies/webServer/htmlTree cannot be loaded'.
        ].
    ].
    (Smalltalk classNamed:'stx_goodies_webServer_htmlTree') load.

    "Modified: / 31-07-2017 / 11:40:25 / mawalch"
!

tearDown
    "common cleanup - invoked after testing."

    super tearDown
! !

!HTMLParserTests methodsFor:'tests'!

test01a
    "test the new parser"
    
    |el|

    el := HTML::HTMLParser parseText:'
<HEAD>
</HEAD>
'.
    "/ el inspect.

    "
     self new test01a
    "

    "Created: / 29-03-2019 / 10:35:20 / Claus Gittinger"
!

test01b
    "test the old parser"

    |el|

    el := HTMLParser parseText:'
<HEAD>
</HEAD>
'.
    "/ el inspect.

    "
     self new test01b
    "

    "Created: / 29-03-2019 / 10:35:27 / Claus Gittinger"
!

test01c
    "test the old parser"

    |el|

    el := HTMLParser parseText:'
<HTML>
<HEAD>
</HEAD>
</HTML>
'.

    "
     self new test01c
    "

    "Created: / 29-03-2019 / 11:22:27 / Claus Gittinger"
!

test02a
    "test the new parser"

    |doc|

    doc := HTML::HTMLParser parseText:'
<!!--
Copyright 2004 ThoughtWorks, Inc

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->
<html>
<head>
  <meta content="text/html; charset=ISO-8859-1"
 http-equiv="content-type">
  <title>Test Open</title>
</head>
<body>
<table cellpadding="1" cellspacing="1" border="1">
  <tbody>
    <tr>
      <td rowspan="1" colspan="3">Google Test Search<br>
      </td>
    </tr>
    <tr>
      <td>open</td>
      <td>http://www.google.com/webhp?hl=en</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Google</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>type</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>verifyValue</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>clickAndWait</td>
      <td>btnG</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTextPresent</td>
      <td>openqa.org</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Selenium OpenQA - Google Search</td>
      <td>&nbsp;</td>
    </tr>
  </tbody>

</table>
</body>
</html>
'.
    self assert:(doc children first tagName = 'head').

    "
     self new test02a
    "

    "Created: / 29-03-2019 / 10:35:45 / Claus Gittinger"
!

test02b
    "test the old parser"

    |doc firstMarkup|

    doc := HTMLParser parseText:'
<!!--
Copyright 2004 ThoughtWorks, Inc

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->
<html>
<head>
  <meta content="text/html; charset=ISO-8859-1"
 http-equiv="content-type">
  <title>Test Open</title>
</head>
<body>
<table cellpadding="1" cellspacing="1" border="1">
  <tbody>
    <tr>
      <td rowspan="1" colspan="3">Google Test Search<br>
      </td>
    </tr>
    <tr>
      <td>open</td>
      <td>http://www.google.com/webhp?hl=en</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Google</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>type</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>verifyValue</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>clickAndWait</td>
      <td>btnG</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTextPresent</td>
      <td>openqa.org</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Selenium OpenQA - Google Search</td>
      <td>&nbsp;</td>
    </tr>
  </tbody>

</table>
</body>
</html>
'.
    firstMarkup := doc markup.
    firstMarkup isTextElement ifTrue:[
        firstMarkup := firstMarkup next
    ].    
    self assert:(firstMarkup tagName = 'html').

    "
     self new test02b
    "

    "Created: / 29-03-2019 / 10:35:55 / Claus Gittinger"
    "Modified: / 29-03-2019 / 11:54:05 / Claus Gittinger"
!

test03
    |doc|

    doc := HTML::HTMLParser parseText:'
<!!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!!--
Copyright 2004 ThoughtWorks, Inc

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->
<html>
<head>
  <meta content="text/html; charset=ISO-8859-1"
 http-equiv="content-type">
  <title>Test Open</title>
</head>
<body>
<table cellpadding="1" cellspacing="1" border="1">
  <tbody>
    <tr>
      <td rowspan="1" colspan="3">Google Test Search<br>
      </td>
    </tr>
    <tr>
      <td>open</td>
      <td>http://www.google.com/webhp?hl=en</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Google</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>type</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>verifyValue</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>clickAndWait</td>
      <td>btnG</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTextPresent</td>
      <td>openqa.org</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Selenium OpenQA - Google Search</td>
      <td>&nbsp;</td>
    </tr>
  </tbody>

</table>
</body>
</html>
'.
    self assert:(doc children first tagName = 'head').
    self assert:(doc docType = '-//W3C//DTD HTML 4.01 Transitional//EN').

    "
     self new test03
    "
!

test04_style
    |doc headElement styleElement styleText|

    "/ verify: no ampersand escaping in style elements

    UserNotification ignoreIn:[
        doc := HTML::HTMLParser parseText:'
<!!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<style>foo bar &bla &froboz &amp; &amp foo</style>
</head>
<body>
</body>
</html>
'.
    ].
    headElement := doc children first.
    self assert:(headElement tagName = 'head').
    styleElement := headElement children first.
    self assert:(styleElement tagName = 'style').
    styleText := HTML::TextExtractor extractTextFromElement:styleElement.
    self assert:(styleText = 'foo bar &bla &froboz & &amp foo').

    "
     self new test04_style
    "

    "Created: / 27-06-2018 / 12:58:48 / Claus Gittinger"
    "Modified: / 16-07-2018 / 19:49:23 / Claus Gittinger"
!

test05_textExtraction
     |doc bodyElement p|

    "/ verify: no ampersand escaping in style elements

     UserNotification ignoreIn:[
        doc := HTML::HTMLParser parseText:'
<!!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<body>
<p>
foo bar &bla &froboz &amp; &amp foo
</p>
</body>
</html>
'.
     ].
     bodyElement := doc body.
     p := bodyElement children first.
     self assert:(p extractedText = 'foo bar &bla &froboz & &amp foo').

    "
     self new test05_textExtraction
    "

    "Created: / 27-06-2018 / 15:28:31 / Claus Gittinger"
!

test06_comments
     |doc bodyElement|

    "/ verify: no ampersand escaping in style elements

     UserNotification ignoreIn:[
        doc := HTML::HTMLParser parseText:'
<!!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<body>
foo<!!--[foo]--><!!--bla-->bar
<!!--[foo]--><!!--bla-->
foo
<!!--[foo]--><!!--bla-->
bar
<!!--[foo]--><!!--bla-->
<p>
foo bar &bla &froboz &amp; &amp foo
</p>
</body>
</html>
'.
     ].
     bodyElement := doc body.
     self assert:(bodyElement extractedText = 'foo bar foo bar foo bar &bla &froboz & &amp foo').

    "
     self new test06_comments
    "

    "Created: / 16-07-2018 / 19:43:12 / Claus Gittinger"
! !

!HTMLParserTests class methodsFor:'documentation'!

version
    ^ '$Header$'
!

version_CVS
    ^ '$Header$'
! !