equal
deleted
inserted
replaced
800 "given some HTML, extract the raw text. |
800 "given some HTML, extract the raw text. |
801 Can be used to search for strings in some html text." |
801 Can be used to search for strings in some html text." |
802 |
802 |
803 |parser doc s first| |
803 |parser doc s first| |
804 |
804 |
805 |
|
806 parser := HTMLParser new. |
805 parser := HTMLParser new. |
807 doc := parser parseText:htmlString. |
806 doc := parser parseText:htmlString. |
808 s := CharacterWriteStream on:(String new:100). |
807 s := CharacterWriteStream on:(String new:100). |
809 first := true. |
808 first := true. |
810 doc markUpElementsDo:[:el | |
809 doc markUpElementsDo:[:el | |
826 ]. |
825 ]. |
827 ^ s contents |
826 ^ s contents |
828 |
827 |
829 " |
828 " |
830 self plainTextOfHTML:' |
829 self plainTextOfHTML:' |
831 bla1 bla2 <br>bla3 <table><tr><td>bla4</td></tr></table> bla5<p>bla6 |
830 bla1 bla2 <br>bla3 <table><tr><td>bla4</td></tr></table> bla5<p>bla6' |
832 ' |
831 self plainTextOfHTML:'Hello World' |
833 " |
832 " |
834 |
833 |
835 "Modified: / 06-05-2015 / 17:02:36 / sr" |
834 "Modified: / 06-05-2015 / 17:02:36 / sr" |
836 ! ! |
835 ! ! |
837 |
836 |