StringUtilities.st
author Claus Gittinger <cg@exept.de>
Tue, 18 Dec 2018 12:52:15 +0100
changeset 4777 b22943151ce0
parent 3649 2a8bc2db1928
child 4938 48be5b40e700
permissions -rw-r--r--
#DOCUMENTATION by cg class: ZipStream class comment/format in: #compress:into: #uncompress:into:
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     1
"{ Encoding: utf8 }"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     2
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     3
"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     4
 COPYRIGHT (c) 1994 by Claus Gittinger
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     5
 COPYRIGHT (c) 2009 by eXept Software AG
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     6
              All Rights Reserved
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     7
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     8
 This software is furnished under a license and may be used
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
     9
 only in accordance with the terms of that license and with the
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    10
 inclusion of the above copyright notice.   This software may not
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    11
 be provided or otherwise made available to, or used by, any
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    12
 other person.  No title to or ownership of the software is
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    13
 hereby transferred.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    14
"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    15
"{ Package: 'stx:libbasic2' }"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    16
3649
2a8bc2db1928 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3181
diff changeset
    17
"{ NameSpace: Smalltalk }"
2a8bc2db1928 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3181
diff changeset
    18
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    19
Object subclass:#StringUtilities
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    20
	instanceVariableNames:''
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    21
	classVariableNames:''
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    22
	poolDictionaries:''
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    23
	category:'Collections-Text-Support'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    24
!
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    25
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    26
!StringUtilities class methodsFor:'documentation'!
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    27
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    28
copyright
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    29
"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    30
 COPYRIGHT (c) 1994 by Claus Gittinger
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    31
 COPYRIGHT (c) 2009 by eXept Software AG
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    32
              All Rights Reserved
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    33
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    34
 This software is furnished under a license and may be used
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    35
 only in accordance with the terms of that license and with the
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    36
 inclusion of the above copyright notice.   This software may not
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    37
 be provided or otherwise made available to, or used by, any
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    38
 other person.  No title to or ownership of the software is
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    39
 hereby transferred.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    40
"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    41
!
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    42
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    43
documentation
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    44
"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    45
    some less often used algorithms have been moved to here to
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    46
    make libbasic more compact.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    47
"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    48
! !
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    49
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    50
!StringUtilities class methodsFor:'edit distance'!
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    51
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    52
editDistanceFrom:s1 to:s2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight
2804
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    53
    "another, simpler editing distance between two strings. 
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    54
     See also: levenshtein"
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    55
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    56
    |editedS2 min d|
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    57
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    58
    s2 size > s1 size ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    59
        ^ self editDistanceFrom:s2 to:s1 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    60
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    61
    s1 size > s2 size ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    62
        0 to:s2 size do:[:pos |
3113
a65ead014560 class: StringUtilities
Claus Gittinger <cg@exept.de>
parents: 2853
diff changeset
    63
            editedS2 := s2 copyWith:$# insertedAfterIndex:pos.
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    64
            
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    65
            d := self editDistanceFrom:s1 to:editedS2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    66
            min := (min ? d) min:d.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    67
        ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    68
        ^ min + insrtWeight
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    69
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    70
2804
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    71
    ^ (1 to:s1 size) sum:
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    72
        [:i | 
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    73
            ((s2 at:i) == $# or:[ (s1 at:i)=(s2 at:i)]) 
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    74
                ifTrue:[0] 
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    75
                ifFalse:[substWeight]
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    76
        ] 
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    77
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    78
    "
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    79
     'comptuer' levenshteinTo:'computer'      
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    80
     self editDistanceFrom:'comptuer' to:'computer' s:4 k:2 c:1 e:nil i:2        
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    81
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    82
     'computr' levenshteinTo:'computer'                                    
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    83
     self editDistanceFrom:'computr' to:'computer' s:4 k:2 c:1 e:nil i:2        
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    84
    "
2804
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    85
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
    86
    "Modified (format): / 09-08-2012 / 05:41:59 / cg"
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    87
!
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    88
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    89
isKey:k1 nextTo:k2
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    90
    "return true, if k1 and k2 are adjacent keys on the keyboard.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    91
     This is used to specially priorize plausible typing errors of adjacent keys."
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    92
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    93
    ^ self isKey:k1 nextTo:k2 onKeyboard:(self keyboardLayout)
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    94
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    95
    "
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    96
     self isKey:$a nextTo:$a   
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    97
     self isKey:$a nextTo:$s   
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    98
     self isKey:$a nextTo:$q   
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
    99
     self isKey:$a nextTo:$w    
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   100
     self isKey:$a nextTo:$y    
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   101
     self isKey:$a nextTo:$z    
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   102
     self isKey:$a nextTo:$x    
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   103
    "
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   104
!
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   105
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   106
isKey:k1 nextTo:k2 onKeyboard:keys
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   107
    "return true, if k1 and k2 are adjacent keys on the keyboard defined by keys"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   108
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   109
    |row1 row2 col1 col2|
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   110
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   111
    row1 := keys findFirst:[:eachRow | col1 := eachRow indexOf:k1. col1 ~~ 0].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   112
    row1 == 0 ifTrue:[^ false].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   113
    row2 := keys findFirst:[:eachRow | col2 := eachRow indexOf:k2. col2 ~~ 0].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   114
    row2 == 0 ifTrue:[^ false].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   115
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   116
    ^ (row1-row2) abs <= 1 and:[(col1-col2) abs <= 1]
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   117
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   118
    "
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   119
     self isKey:$a nextTo:$q onKeyboard:(StringUtilities keyboardLayoutForLanguage:#de)
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   120
     self isKey:$a nextTo:$x onKeyboard:(StringUtilities keyboardLayoutForLanguage:#de)
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   121
    "
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   122
!
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   123
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   124
keyboardLayout
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   125
    "the keyboard layout (used with algorithms to find possible typing errors,
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   126
     for example: edit distance in levenshtein)"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   127
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   128
    ^ self keyboardLayoutForLanguage:(UserPreferences current language)
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   129
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   130
    "
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   131
     self keyboardLayout
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   132
    "
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   133
!
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   134
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   135
keyboardLayoutForLanguage:lang
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   136
    "the keyboard layout (used with algorithms to find possible typing errors,
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   137
     for example: edit distance in levenshtein).
2804
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
   138
     CAVEAT: hard coded us- and german keyboards here - should go into resource file."
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   139
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   140
    "/ danish
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   141
    lang == #da ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   142
        ^ #( 
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   143
               '1234567890-'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   144
               '*qwertyuiopå'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   145
               '**asdfghjklæø'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   146
               '***zxcvbnm' ).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   147
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   148
    lang == #no ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   149
        ^ #( 
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   150
               '1234567890-'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   151
               '*qwertyuiopå'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   152
               '**asdfghjkløæ'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   153
               '***zxcvbnm' ).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   154
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   155
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   156
    (lang == #de or:[lang == #pl or:[lang == #cz]]) ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   157
        ^ #( 
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   158
               '1234567890-'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   159
               '*qwertzuiop'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   160
               '**asdfghjkl:'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   161
               '***yxcvbnm' ).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   162
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   163
    lang == #hu ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   164
        ^ #( 
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   165
               '1234567890-'                         
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   166
               '*qwertyuiopõú'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   167
               '**asdfghjkléáũ'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   168
               '**ízxcvbnm' ).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   169
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   170
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   171
    lang == #pt ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   172
        ^ #( 
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   173
               '1234567890-'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   174
               '*qwertyuiop'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   175
               '**asdfghjklç'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   176
               '***zxcvbnm' ).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   177
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   178
    lang == #es ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   179
        ^ #( 
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   180
               '1234567890-'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   181
               '*qwertyuiop'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   182
               '**asdfghjklñ'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   183
               '***zxcvbnm' ).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   184
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   185
    (lang == #sv or:[lang == #fi]) ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   186
        ^ #( 
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   187
               '1234567890-'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   188
               '*qwertyuiopå'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   189
               '**asdfghjklöä'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   190
               '***zxcvbnm' ).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   191
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   192
    lang == #fr ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   193
        ^ #( 
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   194
               '1234567890'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   195
               '*azertyuiop'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   196
               '**qsdfghjklm'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   197
               '***wxcvbn,' ).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   198
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   199
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   200
    "/ fallback: english, dutch, italian
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   201
    ^ #( 
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   202
           '1234567890-'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   203
           '*qwertyuiop'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   204
           '**asdfghjkl:'
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   205
           '***zxcvbnm' ).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   206
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   207
    "
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   208
     self keyboardLayoutForLanguage:#de 
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   209
    "
2804
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
   210
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
   211
    "Modified (comment): / 09-08-2012 / 05:39:19 / cg"
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   212
!
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   213
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   214
levenshteinDistanceFrom:string1 to:string2 s:substWeight k:kbdTypoWeight c:caseWeight e:exchangeWeight i:insrtWeight d:deleteWeight
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   215
    "parametrized levenshtein.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   216
     return the levenshtein distance of two strings;
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   217
     this value corrensponds to the number of replacements that have to be
2804
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
   218
     made to get string2 from string1. The smaller the returned number,
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
   219
     tbe more similar are the two strings.
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   220
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   221
     The arguments are the costs for
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   222
        s:substitution,
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   223
        k:keyboard type (substitution),   if nil, s is used
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   224
        c:case-change,                    if nil, s is used
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   225
        i:insertion
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   226
        d:deletion
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   227
        e:exchange                        if nil, s*2 is used
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   228
     of a character.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   229
     See IEEE transactions on Computers 1976 Pg 172 ff."
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   230
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   231
    |d  "delta matrix"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   232
     len1 "{ Class: SmallInteger }"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   233
     len2 "{ Class: SmallInteger }"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   234
     dim  "{ Class: SmallInteger }"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   235
     prevRow row col
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   236
     dimPlus1 "{ Class: SmallInteger }"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   237
     min pp c1 c2|
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   238
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   239
    len1 := string1 size.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   240
    len2 := string2 size.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   241
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   242
    "create the help-matrix"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   243
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   244
    dim := len1 max:len2.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   245
    dimPlus1 := dim + 1.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   246
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   247
    d := Array new:dimPlus1.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   248
    1 to:dimPlus1 do:[:i |
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   249
        d at:i put:(Array new:dimPlus1)
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   250
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   251
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   252
    "init help-matrix"
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   253
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   254
    (d at:1) at:1 put:0.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   255
    row := d at:1.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   256
    1 to:dim do:[:j |
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   257
        row at:(j + 1) put:( (row at:j) + insrtWeight )
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   258
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   259
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   260
    1 to:dim do:[:i |
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   261
        (d at:(i + 1)) at:1 put:(  ((d at:i) at:1) + deleteWeight )
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   262
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   263
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   264
    1 to:len1 do:[:i |
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   265
        c1 := string1 at:i.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   266
        1 to:len2 do:[:j |
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   267
            c2 := string2 at:j.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   268
            (c1 == c2) ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   269
                pp := 0
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   270
            ] ifFalse:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   271
                (c1 asLowercase == c2 asLowercase) ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   272
                    pp := caseWeight
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   273
                ] ifFalse:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   274
                    pp := substWeight.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   275
                    substWeight ~~ kbdTypoWeight ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   276
                        (self isKey:c1 asLowercase nextTo:c2 asLowercase) ifTrue:[
3181
adc9d827b4b4 class: StringUtilities
Claus Gittinger <cg@exept.de>
parents: 3113
diff changeset
   277
                            pp := kbdTypoWeight ? substWeight.
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   278
                        ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   279
                    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   280
                    "/ assmuing (;-) that an exchange is very common when typing...
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   281
                    exchangeWeight notNil ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   282
                        (i < len1 and:[j < len2]) ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   283
                            ((string1 at:i) = (string2 at:(j+1))
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   284
                            and:[ (string1 at:i+1) = (string2 at:j) ]) ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   285
                                pp := exchangeWeight.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   286
                            ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   287
                        ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   288
                        (i > 1 and:[j > 1]) ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   289
                            ((string1 at:i) = (string2 at:(j-1))
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   290
                            and:[ (string1 at:i-1) = (string2 at:j) ]) ifTrue:[
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   291
                                pp := exchangeWeight.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   292
                            ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   293
                        ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   294
                    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   295
                ]
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   296
            ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   297
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   298
            prevRow := d at:i.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   299
            row := d at:(i + 1).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   300
            col := j + 1.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   301
            min := (prevRow at:j) + pp.
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   302
            min := min min:( (row at:j) + insrtWeight).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   303
            min := min min:( (prevRow at:col) + deleteWeight).
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   304
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   305
            row at:col put: min
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   306
        ]
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   307
    ].
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   308
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   309
    ^ (d at:(len1 + 1)) at:(len2 + 1)
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   310
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   311
    "
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   312
     'comptuer' levenshteinTo:'computer'       
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   313
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   314
     self levenshteinDistanceFrom:'comptuer' to:'computer' 
2853
217a303f28d1 class: StringUtilities
Claus Gittinger <cg@exept.de>
parents: 2804
diff changeset
   315
            s:4 k:2 c:1 e:nil i:2 d:6    
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   316
    "
2804
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
   317
89a55abc0ff2 comment/format in:
Claus Gittinger <cg@exept.de>
parents: 2214
diff changeset
   318
    "Modified (comment): / 09-08-2012 / 05:40:08 / cg"
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   319
! !
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   320
3649
2a8bc2db1928 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3181
diff changeset
   321
!StringUtilities class methodsFor:'queries'!
2a8bc2db1928 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3181
diff changeset
   322
2a8bc2db1928 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3181
diff changeset
   323
isUtilityClass
2a8bc2db1928 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3181
diff changeset
   324
    ^ self == StringUtilities
2a8bc2db1928 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3181
diff changeset
   325
! !
2a8bc2db1928 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3181
diff changeset
   326
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   327
!StringUtilities class methodsFor:'documentation'!
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   328
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   329
version
3649
2a8bc2db1928 #DOCUMENTATION
Claus Gittinger <cg@exept.de>
parents: 3181
diff changeset
   330
    ^ '$Header$'
2214
9523a5ed7d8a initial checkin
Claus Gittinger <cg@exept.de>
parents:
diff changeset
   331
! !
3113
a65ead014560 class: StringUtilities
Claus Gittinger <cg@exept.de>
parents: 2853
diff changeset
   332