compiler/PEGFsaCharacterTransition.st
changeset 515 b5316ef15274
child 523 09afcf28ed60
child 524 f6f68d32de73
equal deleted inserted replaced
502:1e45d3c96ec5 515:b5316ef15274
       
     1 "{ Package: 'stx:goodies/petitparser/compiler' }"
       
     2 
       
     3 "{ NameSpace: Smalltalk }"
       
     4 
       
     5 PEGFsaTransition subclass:#PEGFsaCharacterTransition
       
     6 	instanceVariableNames:''
       
     7 	classVariableNames:''
       
     8 	poolDictionaries:''
       
     9 	category:'PetitCompiler-FSA'
       
    10 !
       
    11 
       
    12 !PEGFsaCharacterTransition methodsFor:'accessing'!
       
    13 
       
    14 acceptsCodePoint: codePoint
       
    15     self assert: codePoint isInteger.
       
    16     (codePoint < 1) ifTrue: [ ^ false ].
       
    17     ^ characterSet at: codePoint
       
    18 !
       
    19 
       
    20 beginOfRange
       
    21     characterSet withIndexDo: [ :e :index | 
       
    22         e ifTrue: [ ^ index ]
       
    23     ].
       
    24     self error: 'should not happend'
       
    25 !
       
    26 
       
    27 character
       
    28  	self assert: (self isSingleCharacter).
       
    29 	characterSet withIndexDo: [ :e :index | e ifTrue: [ ^ Character codePoint: index ] ].
       
    30 	self error: 'should not happen'.
       
    31 !
       
    32 
       
    33 characterSet
       
    34     ^ characterSet
       
    35 !
       
    36 
       
    37 characterSet: anObject
       
    38     characterSet := anObject
       
    39 !
       
    40 
       
    41 endOfRange
       
    42     | change |
       
    43     change := false.
       
    44     characterSet withIndexDo: [ :e :index | 
       
    45         e ifTrue: [ change := true ].
       
    46         (e not and: [ change ]) ifTrue: [ ^ index - 1]
       
    47     ].
       
    48     ^ characterSet size
       
    49 !
       
    50 
       
    51 notCharacter
       
    52     self assert: self isNotSingleCharacter.
       
    53     characterSet withIndexDo: [ :value :index | value ifFalse: [ ^ Character codePoint: index ] ].
       
    54     ^ self error: 'should not happen'
       
    55 ! !
       
    56 
       
    57 !PEGFsaCharacterTransition methodsFor:'comparing'!
       
    58 
       
    59 = anotherTransition
       
    60     "
       
    61     Please note the identity comparison on destination
       
    62     If you use equality instead of identy, you will get infinite loop.
       
    63 
       
    64     So much for comparison by now :)	
       
    65     "
       
    66     super = anotherTransition ifFalse: [ ^ false ].
       
    67     (characterSet = anotherTransition characterSet) ifFalse: [ ^ false ].
       
    68     
       
    69     ^ true
       
    70 !
       
    71 
       
    72 canBeIsomorphicTo: anotherTransition
       
    73     (super canBeIsomorphicTo: anotherTransition) ifFalse: [ ^ false ].
       
    74     (characterSet = anotherTransition characterSet) ifFalse: [ ^ false ].
       
    75     
       
    76     ^ true
       
    77 !
       
    78 
       
    79 equals: anotherTransition
       
    80     (super equals: anotherTransition) ifFalse: [ ^ false ].
       
    81     (characterSet = anotherTransition characterSet) ifFalse: [ ^ false ].
       
    82 
       
    83     "JK: If character set and destination are the same, priority does not really matter"
       
    84     ^ true
       
    85 !
       
    86 
       
    87 hash
       
    88     ^ super hash bitXor: characterSet hash
       
    89 ! !
       
    90 
       
    91 !PEGFsaCharacterTransition methodsFor:'copying'!
       
    92 
       
    93 postCopy
       
    94     super postCopy.
       
    95     characterSet := characterSet copy.
       
    96 ! !
       
    97 
       
    98 !PEGFsaCharacterTransition methodsFor:'gt'!
       
    99 
       
   100 gtName
       
   101     | gtName |
       
   102     gtName := self characterSetAsString.
       
   103     priority < 0 ifTrue: [ gtName := gtName, ',', priority asString ].
       
   104     ^ gtName
       
   105 ! !
       
   106 
       
   107 !PEGFsaCharacterTransition methodsFor:'initialization'!
       
   108 
       
   109 initialize
       
   110     super initialize.
       
   111     
       
   112     characterSet := Array new: 255 withAll: false.
       
   113 ! !
       
   114 
       
   115 !PEGFsaCharacterTransition methodsFor:'modifications'!
       
   116 
       
   117 addCharacter: character
       
   118     characterSet at: character codePoint put: true
       
   119 ! !
       
   120 
       
   121 !PEGFsaCharacterTransition methodsFor:'printing'!
       
   122 
       
   123 characterSetAsString
       
   124     | stream |
       
   125     stream := WriteStream on: ''.
       
   126     self printCharacterSetOn: stream.
       
   127     ^ stream contents
       
   128 !
       
   129 
       
   130 printCharacterSetOn: stream
       
   131     (self isLetter) ifTrue: [ 
       
   132         stream nextPutAll: '#letter'.
       
   133         ^ self
       
   134     ].
       
   135 
       
   136     (self isWord) ifTrue: [ 
       
   137         stream nextPutAll: '#word'.
       
   138         ^ self
       
   139     ].
       
   140 
       
   141 
       
   142     stream nextPut: $[.
       
   143     32 to: 126 do: [ :index |
       
   144         (characterSet at: index) ifTrue: [ 
       
   145             ((Character codePoint: index) == $") ifTrue: [ 
       
   146                 stream nextPutAll: '""'.
       
   147             ] ifFalse: [ 
       
   148                 stream nextPut: (Character codePoint: index)
       
   149             ]
       
   150         ]
       
   151     ].
       
   152     stream nextPut: $].
       
   153 !
       
   154 
       
   155 printOn: stream
       
   156     self printCharacterSetOn: stream.
       
   157     stream nextPutAll: ' ('.
       
   158     priority printOn: stream.
       
   159     stream nextPutAll: ')'.		
       
   160     stream nextPutAll: '-->'.
       
   161     destination printOn: stream.
       
   162     stream nextPutAll: '(ID: '.
       
   163     stream nextPutAll: self identityHash asString.
       
   164     stream nextPutAll: ')'.
       
   165 ! !
       
   166 
       
   167 !PEGFsaCharacterTransition methodsFor:'set operations'!
       
   168 
       
   169 complement: transition
       
   170     | complement |
       
   171     complement := Array new: 255.
       
   172     
       
   173     1 to: 255 do: [ :index |
       
   174         complement
       
   175             at: index 
       
   176             put: ((self characterSet at: index) and: [(transition characterSet at: index) not])
       
   177     ].
       
   178 
       
   179     ^ complement
       
   180 !
       
   181 
       
   182 disjunction: transition
       
   183     | disjunction |
       
   184     disjunction := Array new: 255.
       
   185     
       
   186     1 to: 255 do: [ :index |
       
   187         disjunction
       
   188             at: index 
       
   189             put: ((self characterSet at: index) xor: [transition characterSet at: index])
       
   190     ].
       
   191 
       
   192     ^ disjunction
       
   193 !
       
   194 
       
   195 intersection: transition
       
   196     | intersection |
       
   197     intersection := Array new: 255.
       
   198     
       
   199     transition isPredicateTransition ifTrue: [ ^ intersection  ].
       
   200     transition isEpsilonTransition ifTrue: [ self error: 'Dont know!!' ].
       
   201     
       
   202     1 to: 255 do: [ :index |
       
   203         intersection
       
   204             at: index 
       
   205             put: ((self characterSet at: index) and: [transition characterSet at: index])
       
   206     ].
       
   207 
       
   208     ^ intersection
       
   209 !
       
   210 
       
   211 union: transition
       
   212     | union |
       
   213     union := Array new: 255.
       
   214     
       
   215     1 to: 255 do: [ :index |
       
   216         union
       
   217             at: index 
       
   218             put: ((self characterSet at: index) or: [transition characterSet at: index])
       
   219     ].
       
   220 
       
   221     ^ union
       
   222 ! !
       
   223 
       
   224 !PEGFsaCharacterTransition methodsFor:'testing'!
       
   225 
       
   226 accepts: character
       
   227     self assert: character isCharacter.
       
   228     ^ self acceptsCodePoint: character codePoint
       
   229 !
       
   230 
       
   231 isAny
       
   232     ^ characterSet allSatisfy: [ :e | e ]
       
   233 !
       
   234 
       
   235 isCharacterTransition
       
   236     ^ true
       
   237 !
       
   238 
       
   239 isDigit
       
   240     characterSet withIndexDo: [ :value :index | 
       
   241         (Character codePoint: index) isDigit == value ifFalse: [ ^ false ]
       
   242     ].
       
   243     ^ true
       
   244 !
       
   245 
       
   246 isEmpty
       
   247     ^ characterSet allSatisfy: [ :e | e not ]
       
   248 !
       
   249 
       
   250 isEpsilon
       
   251     ^ false
       
   252 !
       
   253 
       
   254 isLetter
       
   255     characterSet withIndexDo: [ :value :index | 
       
   256         (Character codePoint: index) isLetter == value ifFalse: [ ^ false ]
       
   257     ].
       
   258     ^ true
       
   259 !
       
   260 
       
   261 isNotSingleCharacter
       
   262     ^ (characterSet select: [ :e | e not ]) size == 1
       
   263 !
       
   264 
       
   265 isSingleCharacter
       
   266     ^ (characterSet select: [ :e | e ]) size == 1
       
   267 !
       
   268 
       
   269 isSingleRange
       
   270     | changes previous |
       
   271     changes := 0.
       
   272     previous := false.
       
   273     characterSet do: [ :e | 
       
   274         (e == previous) ifFalse: [ changes := changes + 1 ].
       
   275         previous := e.
       
   276     ].
       
   277     ^ changes < 3
       
   278 !
       
   279 
       
   280 isWord
       
   281     characterSet withIndexDo: [ :value :index | 
       
   282         (Character codePoint: index) isAlphaNumeric == value ifFalse: [ ^ false ]
       
   283     ].
       
   284     ^ true
       
   285 !
       
   286 
       
   287 overlapsWith: transition
       
   288     transition isCharacterTransition ifFalse: [ ^ false ].
       
   289     self isEpsilon ifTrue: [ ^ true ].
       
   290     transition isEpsilon ifTrue: [ ^ true ].
       
   291     
       
   292     ^ (self intersection: transition) anySatisfy: [ :bool | bool ]
       
   293 ! !
       
   294 
       
   295 !PEGFsaCharacterTransition methodsFor:'transformation'!
       
   296 
       
   297 join: transition
       
   298     ^ self join: transition joinDictionary: Dictionary new.
       
   299 !
       
   300 
       
   301 join: transition joinDictionary: dictionary
       
   302     | newDestination newTransition |
       
   303 "	pair := PEGFsaPair with: self with: transition.
       
   304     (dictionary includesKey: pair) ifTrue: [ ^ dictionary at: pair ].
       
   305     dictionary at: pair put: nil.
       
   306 "	
       
   307     newDestination := self destination join: transition destination joinDictionary: dictionary.
       
   308     newDestination isNil ifTrue: [ self error: 'What a cycle!! I wonder, how does this happened!!' ].
       
   309     
       
   310     newTransition := PEGFsaCharacterTransition new.
       
   311     newTransition destination: newDestination.
       
   312     newTransition characterSet: (self intersection: transition).
       
   313     newTransition priority: (self priority min: transition priority).
       
   314     
       
   315 "	^ dictionary at: pair put: newTransition"
       
   316     ^ newTransition 
       
   317 !
       
   318 
       
   319 mergeWith: transition
       
   320     | union |
       
   321     self assert: destination = transition destination.
       
   322     
       
   323     union := self union: transition.
       
   324     self characterSet: union
       
   325 ! !
       
   326