RegressionTests__BayesClassifierTest.st
author Claus Gittinger <cg@exept.de>
Tue, 09 Jul 2019 18:53:03 +0200
changeset 2327 bf482d49aeaf
parent 1447 2351db93aa5b
permissions -rw-r--r--
#QUALITY by exept class: RegressionTests::StringTests added: #test82c_expanding
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1297
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     1
"{ Encoding: utf8 }"
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     2
1447
2351db93aa5b package changes
Claus Gittinger <cg@exept.de>
parents: 1297
diff changeset
     3
"{ Package: 'stx:goodies/regression' }"
1297
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
"{ NameSpace: RegressionTests }"
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
TestCase subclass:#BayesClassifierTest
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
	instanceVariableNames:''
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
	classVariableNames:''
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
	poolDictionaries:''
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
	category:'Collections-Text-Support'
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
!
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
!BayesClassifierTest class methodsFor:'documentation'!
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
documentation
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    17
"
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    18
    documentation to be added.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
    [author:]
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
        cg
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
    [instance variables:]
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
    [class variables:]
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    26
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    27
    [see also:]
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    28
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    29
"
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    30
! !
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    31
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    32
!BayesClassifierTest methodsFor:'tests'!
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    33
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    34
test02_dehyphenate
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    35
    "This is a demonstration testCase - it is meant to be removed eventually.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    36
     This testCase will PASS.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    37
     Double click on the TestCase class or open a TestRunner to see me checking...
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    38
     - please add more methods like this..."
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    39
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    40
    |b s l|
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    41
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    42
    s := '    
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    43
1 2 3 4
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    44
5 6 7 8-
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    45
9 10 11 12
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    46
'.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    47
    b := BayesClassifier new.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    48
    l := b dehyphenate:s asCollectionOfLines.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    49
    self assert:(l asArray = #('1 2 3 4' '5 6 7 8-' '9 10 11 12')).
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    50
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    51
    s := '    
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    52
a b c d
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    53
e f g h-
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    54
i j k l
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    55
'.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    56
    b := BayesClassifier new.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    57
    l := b dehyphenate:s asCollectionOfLines.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    58
    self assert:(l asArray = #('a b c d' 'e f g hi j k l')).
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    59
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    60
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    61
    s := '    
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    62
a b c d
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    63
e f g h -
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    64
i j k l
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    65
'.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    66
    b := BayesClassifier new.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    67
    l := b dehyphenate:s asCollectionOfLines.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    68
    self assert:(l asArray = #('a b c d' 'e f g h -' 'i j k l')).
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    69
    "
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    70
     self run:#test02_dehyphenate
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    71
     self new test02_dehyphenate
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    72
    "
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    73
!
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    74
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    75
test03_extractWords
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    76
    |b s|
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    77
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    78
    s := '    
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    79
Sieh dir z.B. mal an, was in Deutschland an Rechtsbeugung los ist. Da bekommt in systemrelevanten Verfahren in der Regel der Richter vor dem Prozess mitgeteilt (vom Präsidenten oder seinem Vize), wie das Verfahren ausgehen soll. Und wenn danach Rechtsbeugung erwiesen ist, so darf man das Gericht noch nicht einmal verklagen, sondern muss den Einzelrichter bzw. Senat verklagen, der das Urteil ausgesprochen hat. Und wo muss man den Richter verklagen? An demselben Gericht das ihm zuvor die Anordnung zur Rechtsbeugung erteilt hat. Da ein und dasselbe Gericht das die Rechtsbeugung begangen hat, darüber entscheidet ob es Rechtsbeugung begangen hat, sind in der BRD noch nicht einmal die primitivsten Grundsätze der Gewaltenteilung gegeben. 
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    80
Und dann natürlich immer wieder die Wahlfälschungen. Hier geht es zu wie in einer Bananenrepublik. Nur ein Beispiel von vielen
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    81
http://www.welt.de/politik/deutschland/article150236737/Schueler-zaehlten-falsch...
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    82
Und die Pressezensur wird auch immer schlimmer. Wer anderer Meinung ist möge mir bitte einmal ein regierungskritisches Massenmedium nennen, egal ob Fernsehsender, Radiosender, Zeitschrift ...
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    83
Und dann natürlich die Vernichtung der im Grundgesetz garantierten Rechte auf das Postgeheimnis und Fernmeldegeheimnis. Seit die Regierung bei mir rumschnüffelt, ist mein Rechner echt langsam geworden.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    84
https://de.wikipedia.org/wiki/Fernmeldegeheimnis
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    85
O.k. ist besser als von Islamisten ermordet zu werden, aber ich bin kein Islamist und stehe bei niemandem in Verdacht einer zu sein - also was soll diese Überwachung?!! Ach so, ich habe mal geschrieben wie lächerlich der Vergewaltigungsvorwurf gegen Julian Assange ist, ja dann natürlich ...
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    86
'.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    87
    b := BayesClassifier new.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    88
    b classify:s.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    89
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    90
    "
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    91
     self run:#test1
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    92
     self new test1
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    93
    "
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    94
!
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    95
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    96
test06_classify
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    97
    |b|
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    98
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
    b := BayesClassifier new.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   100
    "/teach it positive phrases
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   101
    b classify:'amazing, awesome movie!!!! Yeah!!!!' asCategory: 'positive'.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   102
    b classify:'Sweet, this is incredibly, amazing, perfect, great!!!!' asCategory: 'positive'.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   103
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
    "/teach it a negative phrase
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   105
    b classify:'terrible, shitty thing. Damn. Sucks!!!!' asCategory: 'negative'.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   106
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   107
    "/teach it a neutral phrase
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   108
    b classify:'I dont really know what to make of this.' asCategory: 'neutral'.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   109
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   110
    "/now test it to see that it correctly categorizes a new document
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   111
    self assert:(b classify:'awesome, cool, amazing!!!! Yay.')= 'positive'.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   112
!
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   113
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   114
xtest04_classify
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   115
    |b s|
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   116
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   117
    s := '    
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   118
Sieh dir z.B. mal an, was in Deutschland an Rechtsbeugung los ist. Da bekommt in systemrelevanten Verfahren in der Regel der Richter vor dem Prozess mitgeteilt (vom Präsidenten oder seinem Vize), wie das Verfahren ausgehen soll. Und wenn danach Rechtsbeugung erwiesen ist, so darf man das Gericht noch nicht einmal verklagen, sondern muss den Einzelrichter bzw. Senat verklagen, der das Urteil ausgesprochen hat. Und wo muss man den Richter verklagen? An demselben Gericht das ihm zuvor die Anordnung zur Rechtsbeugung erteilt hat. Da ein und dasselbe Gericht das die Rechtsbeugung begangen hat, darüber entscheidet ob es Rechtsbeugung begangen hat, sind in der BRD noch nicht einmal die primitivsten Grundsätze der Gewaltenteilung gegeben. 
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   119
Und dann natürlich immer wieder die Wahlfälschungen. Hier geht es zu wie in einer Bananenrepublik. Nur ein Beispiel von vielen
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   120
http://www.welt.de/politik/deutschland/article150236737/Schueler-zaehlten-falsch...
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   121
Und die Pressezensur wird auch immer schlimmer. Wer anderer Meinung ist möge mir bitte einmal ein regierungskritisches Massenmedium nennen, egal ob Fernsehsender, Radiosender, Zeitschrift ...
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   122
Und dann natürlich die Vernichtung der im Grundgesetz garantierten Rechte auf das Postgeheimnis und Fernmeldegeheimnis. Seit die Regierung bei mir rumschnüffelt, ist mein Rechner echt langsam geworden.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   123
https://de.wikipedia.org/wiki/Fernmeldegeheimnis
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   124
O.k. ist besser als von Islamisten ermordet zu werden, aber ich bin kein Islamist und stehe bei niemandem in Verdacht einer zu sein - also was soll diese Überwachung?!! Ach so, ich habe mal geschrieben wie lächerlich der Vergewaltigungsvorwurf gegen Julian Assange ist, ja dann natürlich ...
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   125
'.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   126
    b := BayesClassifier new.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   127
    b classify:s.
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   128
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   129
    "
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   130
     self run:#test1
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   131
     self new test1
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   132
    "
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   133
! !
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   134
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   135
!BayesClassifierTest class methodsFor:'documentation'!
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   136
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   137
version
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   138
    ^ '$Header$'
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   139
!
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   140
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   141
version_CVS
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   142
    ^ '$Header$'
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   143
! !
447af378b887 initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   144