RegressionTests__HTMLParserTests.st
author Claus Gittinger <cg@exept.de>
Tue, 09 Jul 2019 18:53:03 +0200
changeset 2327 bf482d49aeaf
parent 2240 5a1495fa22bb
child 2341 91b141d06afa
permissions -rw-r--r--
#QUALITY by exept class: RegressionTests::StringTests added: #test82c_expanding

"{ Encoding: utf8 }"

"{ Package: 'stx:goodies/regression' }"

"{ NameSpace: RegressionTests }"

TestCase subclass:#HTMLParserTests
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'tests-Regression-XML'
!

!HTMLParserTests class methodsFor:'documentation'!

documentation
"
    documentation to be added.

    [author:]
	mb (mb@SUNGSAM)

    [instance variables:]

    [class variables:]

    [see also:]

"
!

history
    "Created: / 15-01-2009 / 12:44:33 / mb"
! !

!HTMLParserTests methodsFor:'initialize / release'!

setUp
    "common setup - invoked before testing."

    (Smalltalk classNamed:'stx_goodies_webServer_htmlTree') isNil ifTrue:[
        (Smalltalk loadPackage:'stx:goodies/webServer/htmlTree') ifFalse:[
            self error:'stx:goodies/webServer/htmlTree cannot be loaded'.
        ].
    ].
    (Smalltalk classNamed:'stx_goodies_webServer_htmlTree') load.

    "Modified: / 31-07-2017 / 11:40:25 / mawalch"
!

tearDown
    "common cleanup - invoked after testing."

    super tearDown
! !

!HTMLParserTests methodsFor:'tests'!

test01a
    "test the new parser"
    
    |el|

    el := HTML::HTMLParser parseText:'
<HEAD>
</HEAD>
'.
    "/ el inspect.

    "
     self new test01a
    "

    "Created: / 29-03-2019 / 10:35:20 / Claus Gittinger"
!

test01b
    "test the old parser"

    |el|

    el := HTMLParser parseText:'
<HEAD>
</HEAD>
'.
    "/ el inspect.

    "
     self new test01b
    "

    "Created: / 29-03-2019 / 10:35:27 / Claus Gittinger"
!

test01c
    "test the old parser"

    |el|

    el := HTMLParser parseText:'
<HTML>
<HEAD>
</HEAD>
</HTML>
'.

    "
     self new test01c
    "

    "Created: / 29-03-2019 / 11:22:27 / Claus Gittinger"
!

test02a
    "test the new parser"

    |doc|

    doc := HTML::HTMLParser parseText:'
<!!--
Copyright 2004 ThoughtWorks, Inc

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->
<html>
<head>
  <meta content="text/html; charset=ISO-8859-1"
 http-equiv="content-type">
  <title>Test Open</title>
</head>
<body>
<table cellpadding="1" cellspacing="1" border="1">
  <tbody>
    <tr>
      <td rowspan="1" colspan="3">Google Test Search<br>
      </td>
    </tr>
    <tr>
      <td>open</td>
      <td>http://www.google.com/webhp?hl=en</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Google</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>type</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>verifyValue</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>clickAndWait</td>
      <td>btnG</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTextPresent</td>
      <td>openqa.org</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Selenium OpenQA - Google Search</td>
      <td>&nbsp;</td>
    </tr>
  </tbody>

</table>
</body>
</html>
'.
    self assert:(doc children first tagName = 'head').

    "
     self new test02a
    "

    "Created: / 29-03-2019 / 10:35:45 / Claus Gittinger"
!

test02b
    "test the old parser"

    |doc firstMarkup|

    doc := HTMLParser parseText:'
<!!--
Copyright 2004 ThoughtWorks, Inc

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->
<html>
<head>
  <meta content="text/html; charset=ISO-8859-1"
 http-equiv="content-type">
  <title>Test Open</title>
</head>
<body>
<table cellpadding="1" cellspacing="1" border="1">
  <tbody>
    <tr>
      <td rowspan="1" colspan="3">Google Test Search<br>
      </td>
    </tr>
    <tr>
      <td>open</td>
      <td>http://www.google.com/webhp?hl=en</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Google</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>type</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>verifyValue</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>clickAndWait</td>
      <td>btnG</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTextPresent</td>
      <td>openqa.org</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Selenium OpenQA - Google Search</td>
      <td>&nbsp;</td>
    </tr>
  </tbody>

</table>
</body>
</html>
'.
    firstMarkup := doc markup.
    firstMarkup isTextElement ifTrue:[
        firstMarkup := firstMarkup next
    ].    
    self assert:(firstMarkup tagName = 'html').

    "
     self new test02b
    "

    "Created: / 29-03-2019 / 10:35:55 / Claus Gittinger"
    "Modified: / 29-03-2019 / 11:54:05 / Claus Gittinger"
!

test03
    |doc|

    doc := HTML::HTMLParser parseText:'
<!!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!!--
Copyright 2004 ThoughtWorks, Inc

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->
<html>
<head>
  <meta content="text/html; charset=ISO-8859-1"
 http-equiv="content-type">
  <title>Test Open</title>
</head>
<body>
<table cellpadding="1" cellspacing="1" border="1">
  <tbody>
    <tr>
      <td rowspan="1" colspan="3">Google Test Search<br>
      </td>
    </tr>
    <tr>
      <td>open</td>
      <td>http://www.google.com/webhp?hl=en</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Google</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>type</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>verifyValue</td>
      <td>q</td>
      <td>Selenium OpenQA</td>
    </tr>
    <tr>
      <td>clickAndWait</td>
      <td>btnG</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTextPresent</td>
      <td>openqa.org</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>verifyTitle</td>
      <td>Selenium OpenQA - Google Search</td>
      <td>&nbsp;</td>
    </tr>
  </tbody>

</table>
</body>
</html>
'.
    self assert:(doc children first tagName = 'head').
    self assert:(doc docType = '-//W3C//DTD HTML 4.01 Transitional//EN').

    "
     self new test03
    "
!

test04_style
    |doc headElement styleElement styleText|

    "/ verify: no ampersand escaping in style elements

    UserNotification ignoreIn:[
        doc := HTML::HTMLParser parseText:'
<!!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<style>foo bar &bla &froboz &amp; &amp foo</style>
</head>
<body>
</body>
</html>
'.
    ].
    headElement := doc children first.
    self assert:(headElement tagName = 'head').
    styleElement := headElement children first.
    self assert:(styleElement tagName = 'style').
    styleText := HTML::TextExtractor extractTextFromElement:styleElement.
    self assert:(styleText = 'foo bar &bla &froboz & &amp foo').

    "
     self new test04_style
    "

    "Created: / 27-06-2018 / 12:58:48 / Claus Gittinger"
    "Modified: / 16-07-2018 / 19:49:23 / Claus Gittinger"
!

test05_textExtraction
     |doc bodyElement p|

    "/ verify: no ampersand escaping in style elements

     UserNotification ignoreIn:[
        doc := HTML::HTMLParser parseText:'
<!!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<body>
<p>
foo bar &bla &froboz &amp; &amp foo
</p>
</body>
</html>
'.
     ].
     bodyElement := doc body.
     p := bodyElement children first.
     self assert:(p extractedText = 'foo bar &bla &froboz & &amp foo').

    "
     self new test05_textExtraction
    "

    "Created: / 27-06-2018 / 15:28:31 / Claus Gittinger"
!

test06_comments
     |doc bodyElement|

    "/ verify: no ampersand escaping in style elements

     UserNotification ignoreIn:[
        doc := HTML::HTMLParser parseText:'
<!!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<body>
foo<!!--[foo]--><!!--bla-->bar
<!!--[foo]--><!!--bla-->
foo
<!!--[foo]--><!!--bla-->
bar
<!!--[foo]--><!!--bla-->
<p>
foo bar &bla &froboz &amp; &amp foo
</p>
</body>
</html>
'.
     ].
     bodyElement := doc body.
     self assert:(bodyElement extractedText = 'foo bar foo bar foo bar &bla &froboz & &amp foo').

    "
     self new test06_comments
    "

    "Created: / 16-07-2018 / 19:43:12 / Claus Gittinger"
!

test07_badAttributeInEbayPage
     |doc|

    "/ when getting ebay's innerhtml, we get the following from firefox
    "/ (which is invalid in the '<a role="button" _sp="p2047675.l1473" ... line)

     UserNotification ignoreIn:[
        doc := HTML::HTMLParser parseText:'
<div class="c-std vi-ds3cont-box-marpad " expeccoid="5854a792-55e4-4e30-80b9-6f9256863e60">
                <div class="actPanel  vi-noborder " expeccoid="07a6de1e-afe0-4f97-a6fc-5da0c713245a">   
                <table class="vi-bboxrev-tbl" expeccoid="97f87a2d-df4f-4036-a0cf-1457fddc6423" width="100%">
                        <tbody expeccoid="c34fba5f-0727-4f8b-9f48-5f75c5c46025"><tr expeccoid="78e69257-8792-447e-a383-8860ed5a9181">
                                <td expeccoid="9e5f8b64-7655-4e57-91bf-8651956fa738">&nbsp;</td>
                                <td class="vi-bboxrev-cntrcell" expeccoid="d20b7e71-f388-4d95-a28d-63fddbf7b738">
        <div class="u-cb spcr  vi-bbox-spcr15 " expeccoid="694c0a74-4e12-44fc-b581-5317b823ab2d"></div>

                                <div class="u-cb" expeccoid="e2f09742-6068-4ba9-bec3-a3ae9d77a470">
                                        <div class="vi-bbox-dspn u-flL lable binLable" id="prcIsum-lbl" expeccoid="e394142f-894f-479f-a7cd-c08915cbb0c6">Preis:</div>
                                                <div id="vi-mskumap-none" style="" class="u-flL w29 vi-price " expeccoid="e3b55a6d-8605-438f-ab60-70701e7581ce">

        <span class="notranslate" id="prcIsum" itemprop="price" style="" content="399.0" expeccoid="7a6c8689-32f5-4f50-a088-ff1be081d2f5">EUR 399,00</span>
                <span itemprop="availability" content="https://schema.org/InStock" expeccoid="3137ad87-1264-444b-9b85-98ef724ab76c"></span>
        <span itemprop="priceCurrency" content="EUR" expeccoid="350f8bf7-1514-4665-a94a-bf9fe97c1a05"></span>
        <!!--Added for VAT message - DE site. Show VAT included msg just below the price. Converted price message should come after this message.-->
        <div class="vat" expeccoid="b21b28fe-b3b7-4def-963c-24a014b18adb">(inkl. MwSt.)</div>
        <!!-- Vat Excluded message -->
        </div>
<div id="isum-shipCostDiv" class=" sh-CostBB" style="clear: both; display: none;" expeccoid="71265b78-caf3-421b-b583-4f07b78c1492">Kostenloser Versand</div>
                                                <span class="pdT8 " id="vi-bboxrev-othrprices" expeccoid="ad58ac0f-d6ca-4491-82a9-820c5080d953">
                                                        </span>

                                                <!!-- inserting code for another button -->
                                                <!!-- code ends  -->
                                                <div class="u-flL" expeccoid="32007685-f9ae-4e3b-a965-e15e349ea40d">
                                                        <a role="button" _sp="p2047675.l1356" id="binBtn_btn" style="" class="btn btn-prim btn-m vi-VR-btnWdth-L " href="https://offer.ebay.de/ws/eBayISAPI.dll?BinConfirm&amp;rev=318&amp;fromPage=2047675&amp;item=372177206742&amp;fb=1" vib="vib" rel="nofollow" "="" expeccoid="48d25feb-7255-4db6-ac71-0236037cde12">
                                                        Sofort-Kaufen<span class="clipped" expeccoid="c4033cd5-0b5a-4cdf-a84c-f25e0f69ba40"> - </span>
                                                        </a>
                                                <div class="oly_upnl" id="streamLineBinOly" expeccoid="eb5c3f3f-7d04-4c96-82cd-fa93a23e2ca2"><div id="streamline-bin-layer" expeccoid="af596b1f-d458-4a43-8301-7d5047c2f242">
            <div id="streamline-bin-layer-content" expeccoid="b384cce2-5e1b-48a5-a8a7-ad4f7dc95ee1">
                <div id="sbin-image-content" expeccoid="1be9cd52-8c2d-4132-8ac1-49b7b3fbcdad">
                    <img id="sbin-image" src="" alt="Sessel-Loungesessel-Clubsessel-Stoff-blau-Sitzmoebel-Wohnmoebel-Frenco" expeccoid="da17cef8-be1f-43e4-90c4-fca6dfcec4f9">
                </div>
                <div id="sbin-text-content" expeccoid="832f96ee-e300-48e7-97c7-29b892330546">
                    <p class="sbin-title" expeccoid="2132b498-9bee-4b28-a378-88d73eaf8a36">Sessel Loungesessel Clubsessel Stoff blau Sitzmöbel Wohnmöbel Frenco</p>
                </div>
            </div>

            <div style="clear:both" expeccoid="a3e3867c-b660-40c3-8645-bbfc7e177d68"></div>

            <div id="sbin-buttons" expeccoid="0dab8f09-e50d-40a6-81bf-fe2117ba1b52">
                <button id="sbin-signin-btn" type="button" expeccoid="94dc2fb8-5949-478f-aecf-d08d2357f924">Einloggen und zur Kasse gehen</button>
                <button id="sbin-gxo-btn" type="button" expeccoid="f83fb1bc-8028-4e0f-b77d-331d9dbe917d">Als Gast kaufen</button>
            </div>
        </div>

    </div>
        </div>
                                        </div>  

                                <div class="u-cb spcr vi-bbox-spcr10" expeccoid="c5773c82-b2e5-4406-b9cf-9f1f2ee07452"></div>
                <div class="u-cb  " expeccoid="e66441e7-b1f7-4342-a97f-587b9dd314ef">
                                <div class="vi-bbox-dspn u-flL lable" expeccoid="4916b066-5ca8-4510-acad-f4ee7a5017f8">&nbsp;</div>
                                <div class="vi-bbox-dspn u-flL w29" expeccoid="4887b323-d297-4eeb-936a-e4f4226a9899">&nbsp;</div>
                                <span expeccoid="0cf4f8df-5d53-4697-b7b5-cf0b9efbaeee">
                                                        <a role="button" _sp="p2047675.l1473" id="isCartBtn_btn" style="" class="btn btn-scnd btn-m vi-VR-btnWdth-L " href="https://cart.payments.ebay.de/sc/add?item=iid:372177206742,qty:1&amp;srt=01000400000050c9e6b4b7e8a98fa345bada6aa05c2af7b022564fa4af69bb2b1facef665ef094d1ceda6cf587b4f354495e3fde21224874629196b80aaf7e986322a17673f7dea6b072ca12d80bf48c979adbb385a72e&amp;ssPageName=CART:ATC" vib="vib" rel="nofollow" "="" expeccoid="5c24b1ee-b193-4794-890a-312def923271">
                                                        In den Warenkorb<span class="clipped" expeccoid="e4f6e129-8b33-4734-b576-f374c1ee366f"> - </span>
                                                        </a>
                                                </span>
                                                </div>
                        <div class="u-cb spcr" expeccoid="687b199a-e6b3-4e79-8312-92285ad6fd54"></div>
                        </td>
                        <td expeccoid="c7d638f3-694c-492d-a1d5-460480cec889">&nbsp;</td>
                        </tr>
                        </tbody></table>
                </div>

        <div class="watchListCmp  vi-noborder " expeccoid="07cc69f9-d1da-4716-b789-ea1b1fe6ba6f">
<table class="vi-bboxrev-tbl" expeccoid="961e3260-68d4-4b92-ae1b-3e1265f881fc" width="100%">
                <tbody expeccoid="5fcb3674-b815-4c85-a480-f255702b01c4"><tr expeccoid="6418343d-da8c-4b9f-bf25-cbcc86060108">
                        <td expeccoid="9f2682dc-ee1c-46b7-a086-88346fc64658">&nbsp;</td>
                        <td class="vi-bboxrev-cntrcell" expeccoid="f3084bc9-699e-4307-9242-397b6a7aef64">
<div class=" " expeccoid="4d906144-37d4-4696-a1bd-e40874b36e5b">
                                        <div id="vi-atl-lnk" class="vi-atw-btm-lnk  vi-cleanup-atwl " style="" expeccoid="2c4db542-baed-4fa5-b750-c817570a1611">                
                <a i="-99" n="Watch list" href="https://www.ebay.de/myb/WatchListAdd?_trksid=p2047675.l1360&amp;SubmitAction.AddToListVI=x&amp;item=372177206742&amp;rt=nc&amp;srt=01000400000050372e73209863317b49d8119703fe2fb3aaa8b66ea9efa1b79fe95e306c42112602d2e570dba2827bf74049b5375c671bde80c5ae3fa61295df5428006d35a0860ef486744ad734f9838bbb22dca3e30a&amp;wt=2177ec1ef2dd78e521c371b0d332689b&amp;ssPageName=VIP:watchlink:top:de&amp;sourcePage=4340" expeccoid="d4eb1d8d-9765-4648-aaad-9d0f5ace820b">
                        <span class="vi-atw-icn" expeccoid="b526871c-5f30-4701-a2b9-d15cc864a476"></span>
                        <span class="vi-atw-txt" expeccoid="2b6115ed-c546-4e37-970c-c20c58bc185d">Auf die Beobachtungsliste</span>
                        <span class="vi-rmw-txt" expeccoid="08ac389a-394c-46dc-addf-16a3689df6ad">Beobachten beenden</span>
                </a>
        </div>

        <div id="vi-atw-full" class="vi-atw-btm-lnk " style="display:none;" expeccoid="dd577995-a50f-4df5-b8fa-710fc66efb24">
                <span class="vi-atw-full-lnk" expeccoid="1de876fd-5971-46c9-a0cf-85e0424b499e">
                        <span class="vi-atw-icn" expeccoid="64a42660-c60e-47f6-8795-6b7f13becbc9"></span><span class="vi-atw-txt" expeccoid="8cc5ee62-cdcf-4084-bcfd-1a9404ac9550">Ihre Beobachtungsliste ist voll.</span>
                </span>
        </div>

        </div>


                                <div class="vi-bbox-dspn u-cb spcr" expeccoid="94ff49bd-a6bc-4178-8ab9-28a3a931ba98"></div>
                                <span class="vi-bbox-marLft20" expeccoid="48026109-1244-4da1-8c8d-01f2871cb1ca"><div class="vi-bbox-dspn u-flL lable" expeccoid="0611260a-c134-469e-acba-5cbe6bf87da3">&nbsp;</div>
<div class="vi-bbox-dspn u-flL w29" expeccoid="70167017-7185-43f9-9265-fe7dcc238cbb">&nbsp;</div>
                </span>
                                                                <div class="u-cb spcr vi-bbox-spcr22" expeccoid="4829f2b9-c5be-4231-8fbe-a696d1050512"></div>
                                                        </td>
                                                        <td expeccoid="a5388e20-8739-4594-9e3e-5afb826a6975">&nbsp;</td>
                                                </tr>
                                        </tbody></table>
                                </div><div id="why2buy" expeccoid="4dd9c815-7d7b-44ed-a621-6b7f9ec05bc4"><div class="w2b w2bsls" expeccoid="794cddbf-0231-4a1d-98f5-bf5eb9eb8d40">
    <div class="w2b-cnt w2b-3 w2b-red" expeccoid="0f21cf97-b244-4591-a91f-4aebb1be933e"><span class="w2b-sgl" expeccoid="fdedce8d-4b45-46dc-bfb1-c3bf05452897">Inlandsversand und Rücksendung kostenlos</span></div>
        <div class="w2b-cnt w2b-3 w2b-brdr" expeccoid="df7655ce-6661-46cb-8c9d-b449eb026c0f"><span class="w2b-sgl" expeccoid="9869da07-2f91-4392-971c-0d96e271d118">Versand aus Deutschland</span></div>
        <div class="w2b-cnt w2b-3 w2b-brdr" expeccoid="68101fa3-aad3-474d-8556-e1024fc63832"><span class="w2b-sgl" expeccoid="fea7ce21-3bb2-4971-b7e6-945ce8e91870">9 Beobachter</span></div>
        </div>
</div></div>
'.
     ].

    "
     self new test07_badAttributeInEbayPage
    "

    "Created: / 24-05-2019 / 14:34:10 / Claus Gittinger"
! !

!HTMLParserTests class methodsFor:'documentation'!

version
    ^ '$Header$'
!

version_CVS
    ^ '$Header$'
! !