author | Jan Vrany <jan.vrany@fit.cvut.cz> |
Wed, 19 Jul 2017 09:42:32 +0200 | |
branch | jv |
changeset 17619 | edb119820fcb |
parent 17132 | 17d361c666c2 |
permissions | -rw-r--r-- |
16884 | 1 |
"{ Encoding: utf8 }" |
2 |
||
10014 | 3 |
" |
10089 | 4 |
Copyright (c) 2007-2010 Jan Vrany, SWING Research Group, Czech Technical University in Prague |
5 |
Copyright (c) 2009-2010 eXept Software AG |
|
10014 | 6 |
|
10089 | 7 |
Permission is hereby granted, free of charge, to any person |
8 |
obtaining a copy of this software and associated documentation |
|
9 |
files (the 'Software'), to deal in the Software without |
|
10 |
restriction, including without limitation the rights to use, |
|
11 |
copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
12 |
copies of the Software, and to permit persons to whom the |
|
13 |
Software is furnished to do so, subject to the following |
|
14 |
conditions: |
|
15 |
||
16 |
The above copyright notice and this permission notice shall be |
|
17 |
included in all copies or substantial portions of the Software. |
|
18 |
||
19 |
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, |
|
20 |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
|
21 |
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
22 |
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
|
23 |
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
|
24 |
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
25 |
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
|
26 |
OTHER DEALINGS IN THE SOFTWARE. |
|
10014 | 27 |
" |
28 |
"{ Package: 'stx:libtool' }" |
|
29 |
||
16884 | 30 |
"{ NameSpace: Smalltalk }" |
31 |
||
10014 | 32 |
Object subclass:#Diff |
33 |
instanceVariableNames:'equivMax heuristic nodiscards xvec yvec fdiag bdiag fdiagoff |
|
34 |
bdiagoff filevec cost snakeLimit inhibit' |
|
35 |
classVariableNames:'' |
|
36 |
poolDictionaries:'' |
|
11701 | 37 |
category:'Collections-Support' |
10014 | 38 |
! |
39 |
||
40 |
Link subclass:#Change |
|
41 |
instanceVariableNames:'inserted deleted line0 line1' |
|
42 |
classVariableNames:'' |
|
43 |
poolDictionaries:'' |
|
44 |
privateIn:Diff |
|
45 |
! |
|
46 |
||
47 |
Object subclass:#Data |
|
48 |
instanceVariableNames:'bufferedLines equivs undiscarded realindexes nondiscardedLines |
|
49 |
changedFlag' |
|
50 |
classVariableNames:'' |
|
51 |
poolDictionaries:'' |
|
52 |
privateIn:Diff |
|
53 |
! |
|
54 |
||
55 |
Object subclass:#ForwardScript |
|
56 |
instanceVariableNames:'' |
|
57 |
classVariableNames:'' |
|
58 |
poolDictionaries:'' |
|
59 |
privateIn:Diff |
|
60 |
! |
|
61 |
||
62 |
Object subclass:#ReverseScript |
|
63 |
instanceVariableNames:'' |
|
64 |
classVariableNames:'' |
|
65 |
poolDictionaries:'' |
|
66 |
privateIn:Diff |
|
67 |
! |
|
68 |
||
69 |
!Diff class methodsFor:'documentation'! |
|
70 |
||
71 |
copyright |
|
72 |
" |
|
10089 | 73 |
Copyright (c) 2007-2010 Jan Vrany, SWING Research Group, Czech Technical University in Prague |
74 |
Copyright (c) 2009-2010 eXept Software AG |
|
75 |
||
76 |
Permission is hereby granted, free of charge, to any person |
|
77 |
obtaining a copy of this software and associated documentation |
|
78 |
files (the 'Software'), to deal in the Software without |
|
79 |
restriction, including without limitation the rights to use, |
|
80 |
copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
81 |
copies of the Software, and to permit persons to whom the |
|
82 |
Software is furnished to do so, subject to the following |
|
83 |
conditions: |
|
10014 | 84 |
|
10089 | 85 |
The above copyright notice and this permission notice shall be |
86 |
included in all copies or substantial portions of the Software. |
|
87 |
||
88 |
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, |
|
89 |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
|
90 |
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
91 |
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
|
92 |
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
|
93 |
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
94 |
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
|
95 |
OTHER DEALINGS IN THE SOFTWARE. |
|
10014 | 96 |
" |
97 |
! |
|
98 |
||
99 |
documentation |
|
100 |
" |
|
101 |
I'm standard diff implementation written purely in Smalltalk. I can |
|
102 |
compute differences between two sequenceable collections, not neccesaarily |
|
103 |
holding strings. Elements are compared using #=. |
|
104 |
||
105 |
Result of comparison is an edit script, a linked list of Diff::Changes, |
|
106 |
each keeping one difference: whether change is insert and/or delete, |
|
107 |
and positions in A and B. |
|
108 |
||
109 |
I'm a port of Java diff. |
|
110 |
||
111 |
[author:] |
|
112 |
Jakub Zelenka (zelenj7@fel.cvut.cz) |
|
113 |
Vladislav Skoumal (skoumal@skoumal.net) |
|
114 |
Jan Vrany (jan.vrany@fit.cvut.cz) |
|
115 |
||
116 |
[instance variables:] |
|
117 |
||
118 |
[class variables:] |
|
119 |
||
120 |
[see also:] |
|
121 |
||
122 |
" |
|
123 |
! |
|
124 |
||
125 |
documentation_czech |
|
126 |
" |
|
16884 | 127 |
prvnà fáze: |
10014 | 128 |
############################################################################################################################# |
129 |
first := #('prvni' 'druhy' 'treti' 'treti' 'paty' 'zeleny' 'ruzovy' ). |
|
130 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy' 'treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa' ). |
|
131 |
############################################################################################################################ |
|
16884 | 132 |
First a second pøedstavujou dvì pole, které chceme porovnávat. Jednotlivé položky v poli si lze pøedstavit jako øádky, pøÃpadnì jako slova v øádku. |
133 |
Podle toho, co je potøeba porovnávat. |
|
10014 | 134 |
|
135 |
***************************************************************************************************************************** |
|
136 |
diff := FelDiff new felDiff. |
|
137 |
***************************************************************************************************************************** |
|
16884 | 138 |
Zde probÃhá inicializace defaultnÃh promìnných. Funguje to jako konstruktor. |
10014 | 139 |
|
140 |
############################################################################################################################ |
|
141 |
diff diff: first b: second |
|
142 |
############################################################################################################################ |
|
16884 | 143 |
Prvnà fáze nutná pro porovnávánà polÃ. Vzniknou dvì instance tøÃde filedata uložené do pole. Tyto instance budou obsahovat následujÃcà údaje: |
10014 | 144 |
|
145 |
filevec[1].equivs=#(1 2 3 3 4 5 6) |
|
146 |
filevec[1].bufferedLines=7 |
|
147 |
filevec[1].changedFlag=#() |
|
148 |
||
149 |
filevec[2].equivs=#(1 3 6 7 3 8 9 10 10 11 12 13 14 15 10 10) |
|
150 |
filevec[2].bufferedLines=16 |
|
151 |
filevec[2].changedFlag=#() |
|
152 |
||
16884 | 153 |
V zásadì se vytvoøila structura Dictionary, která jednotlivé øádky(slova) pøevedla na èÃsla. Pole equvs pak pøedstavuje èÃselnì slova(øádky). |
154 |
èÃsla, která se nalézajà v obou dbou polà equivs znaèÃ, že soubory sdÃlà alespoò nìjaké slovo(øádek). |
|
10014 | 155 |
|
156 |
***************************************************************************************************************************** |
|
157 |
change:= diff diff2: true. |
|
158 |
***************************************************************************************************************************** |
|
159 |
||
16884 | 160 |
Zde již docházà k porovnánà obou dvou polàs øádky(slovy). Lze si vybrat mezi forwardscriptem a reversescriptem. |
10014 | 161 |
|
162 |
1) metoda discardconfusinglines |
|
16884 | 163 |
výsledek: |
10014 | 164 |
filevec[1].undiscardeded=#(1 3 3 5 6 0 0) |
165 |
filevec[1].realIndexes= #(0 2 3 5 6 0 0) |
|
166 |
filevec[1].nondiscardedLines=5 |
|
167 |
filevec[1].changedFlag=#(false false true false false true false false false) |
|
168 |
||
169 |
filevec[2].undiscardeded=#(1 3 5 6 3 0 0 0 0 0 0 0 0 0 0 0) |
|
170 |
filevec[2].realIndexes= #(0 1 2 3 4 0 0 0 0 0 0 0 0 0 0 0) |
|
171 |
filevec[2].nondiscardedLines=5 |
|
172 |
||
16884 | 173 |
Undiscarded- Øádky soubory, které jsou shodné. |
174 |
RealIndexes - indexy øádkù v poli(je potøeba pøièÃst jedna) |
|
175 |
- to znamená že index prvnÃho 3->3 pozice v prvnÃm vstupnÃm poli |
|
176 |
- index druhého 3->2 pozice v druhém vstupnÃm poli a 3->5 pozice v druhém vstupnÃm poli |
|
177 |
NondiscardedLines- znaèà kolik èádkù(slov) je shodných v obou polÃch. |
|
10014 | 178 |
|
16884 | 179 |
2)Následuje porovnávánà jednotlivých polà a vytvoøenà výsledku |
10014 | 180 |
|
16884 | 181 |
3)Výsledek Reverse skript |
10014 | 182 |
|
183 |
inserted=12('treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa') |
|
184 |
deleted=0 |
|
185 |
line0=7('ruzovy') |
|
186 |
line1=4('ruzovy') |
|
187 |
link=next |
|
188 |
||
16884 | 189 |
inserted- kolik znakù bylo vloženo |
190 |
deleted - kolik znakù bylo smazáno |
|
191 |
line0 - poøadà znaku za kterým bylo nìco vloženo(smazáno) v prvnÃm poli(poslednà znak který je shodný v obou polÃch) |
|
192 |
line1 - poøadà znaku za kterým bylo nìco smazáno(vloženo) v prvnÃm poli(poslednà znak který je shodný v obou polÃch) |
|
10014 | 193 |
|
16884 | 194 |
Takže po znaku na pozici 4, je 12 vložených znakù oproti prvnÃmu |
10014 | 195 |
|
16884 | 196 |
Zbytek pole vypadá takto: |
10014 | 197 |
|
198 |
first := #('prvni' 'druhy' 'treti' 'treti' 'paty' 'zeleny' 'ruzovy' ). |
|
199 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy'). |
|
200 |
||
16884 | 201 |
link nenà null a tudÞ odkazuje na dalšà informace o zmìnách. |
10014 | 202 |
inserted=0 |
203 |
deleted=2('treti' 'paty') |
|
204 |
line0=3('treti') |
|
205 |
line1=2('treti') |
|
206 |
link=next |
|
207 |
||
16884 | 208 |
zbytek pole vypadá takto: |
10014 | 209 |
first := #('prvni' 'druhy' 'treti' 'zeleny' 'ruzovy' ). |
210 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy'). |
|
211 |
||
16884 | 212 |
link nenà null a tudÞ odkazuje na dalšà informace o zmìnách. |
10014 | 213 |
inserted=0 |
214 |
deleted=1('druhy') |
|
215 |
line0=1('prvni') |
|
216 |
line1=1('prvni') |
|
217 |
link=nil |
|
218 |
||
16884 | 219 |
zbytek pole vypadá takto: |
10014 | 220 |
first := #('prvni' 'treti' 'zeleny' 'ruzovy' ). |
221 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy'). |
|
222 |
||
16884 | 223 |
link je nil. Neexistuje žádná zmìna a tato pole jsou shodná. |
10014 | 224 |
|
16884 | 225 |
4)Výsledek Forward skript |
10014 | 226 |
|
227 |
inserted=0 |
|
228 |
deleted=1('druhy') |
|
229 |
line0=1('prvni') |
|
230 |
line1=1('prvni') |
|
231 |
link=next |
|
232 |
||
16884 | 233 |
zbytek pole vypadá takto: |
10014 | 234 |
first := #('prvni' 'treti' 'treti' 'paty' 'zeleny' 'ruzovy' ). |
235 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy' 'treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa' ). |
|
236 |
||
16884 | 237 |
link nenà nil jdeme na odkaz: |
10014 | 238 |
inserted=0 |
239 |
deleted=2('treti' 'paty') |
|
240 |
line0=3('treti') |
|
241 |
line1=2('treti') |
|
242 |
link=next |
|
243 |
||
16884 | 244 |
zbytek pole vypadá takto: |
10014 | 245 |
first := #('prvni' 'treti' 'zeleny' 'ruzovy' ). |
246 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy' 'treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa' ). |
|
247 |
||
16884 | 248 |
link nenà nil jdeme na odkaz: |
10014 | 249 |
|
250 |
inserted=12('treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa') |
|
251 |
deleted=0 |
|
252 |
line0=7('ruzovy') |
|
253 |
line1=4('ruzovy') |
|
254 |
link=nil |
|
255 |
||
16884 | 256 |
zbytek pole vypadá takto: |
10014 | 257 |
first := #('prvni' 'treti' 'zeleny' 'ruzovy' ). |
258 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy'). |
|
259 |
||
260 |
Konec |
|
261 |
" |
|
262 |
! ! |
|
263 |
||
264 |
!Diff class methodsFor:'instance creation'! |
|
265 |
||
266 |
new |
|
267 |
"return an initialized instance" |
|
268 |
||
269 |
^ self basicNew initialize. |
|
270 |
! ! |
|
271 |
||
272 |
!Diff class methodsFor:'diffing'! |
|
273 |
||
274 |
between: a and: b |
|
275 |
||
276 |
^self between: a and: b reverse: false |
|
277 |
||
278 |
"Created: / 16-02-2010 / 23:08:55 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
279 |
! |
|
280 |
||
281 |
between: a and: b reverse: reverse |
|
282 |
||
283 |
^self new |
|
284 |
a: a b: b; |
|
285 |
diff: reverse |
|
286 |
||
287 |
"Created: / 16-02-2010 / 23:04:50 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
288 |
! ! |
|
289 |
||
290 |
!Diff methodsFor:'diffing'! |
|
291 |
||
292 |
a:gA b:gB |
|
293 |
"Prepare to find differences between two arrays. Each element of |
|
294 |
the arrays is translated to an" |
|
295 |
"equivalence number" |
|
296 |
" based on |
|
297 |
the result of <code>equals</code>. The original Object arrays |
|
298 |
are no longer needed for computing the differences. They will |
|
299 |
be needed again later to print the results of the comparison as |
|
300 |
an edit script, if desired." |
|
301 |
||
302 |
|h data| |
|
303 |
||
304 |
h := Dictionary new:(gA size + gB size). |
|
305 |
data := Data new. |
|
306 |
data fileData. |
|
307 |
data |
|
308 |
fileData:gA |
|
309 |
hashTable:h |
|
310 |
felDiff:self. |
|
311 |
self filevec at:1 put:data. |
|
312 |
data := Data new. |
|
313 |
data fileData. |
|
314 |
data |
|
315 |
fileData:gB |
|
316 |
hashTable:h |
|
317 |
felDiff:self. |
|
318 |
self filevec at:2 put:data. |
|
319 |
||
320 |
"Modified: / 12-02-2010 / 14:22:56 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
321 |
! |
|
322 |
||
323 |
diff |
|
324 |
||
325 |
^self diff: false |
|
326 |
||
327 |
"Created: / 16-02-2010 / 22:50:26 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
328 |
! |
|
329 |
||
330 |
diff:reverse |
|
331 |
||
332 |
^reverse |
|
333 |
ifTrue:[self diffUsingScript: ReverseScript new] |
|
334 |
ifFalse:[self diffUsingScript: ForwardScript new] |
|
335 |
||
336 |
"Modified: / 16-02-2010 / 22:51:43 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
337 |
! |
|
338 |
||
339 |
diffUsingScript:bld |
|
340 |
"Get the results of comparison as an edit script. The script |
|
341 |
is described by a list of changes. The standard ScriptBuilder |
|
342 |
implementations provide for forward and reverse edit scripts. |
|
343 |
Alternate implementations could, for instance, list common elements |
|
344 |
instead of differences. |
|
345 |
@param bld an object to build the script from change flags |
|
346 |
@return the head of a list of changes |
|
347 |
Some lines are obviously insertions or deletions |
|
348 |
because they don't match anything. Detect them now, |
|
349 |
and avoid even thinking about them in the main comparison algorithm." |
|
350 |
||
351 |
|diags first second ret| |
|
352 |
||
353 |
self discardConfusingLines. |
|
354 |
"Now do the main comparison algorithm, considering just the |
|
355 |
undiscarded lines." |
|
356 |
first := filevec at:1. |
|
357 |
second := filevec at:2. |
|
358 |
xvec := first undiscarded. |
|
359 |
yvec := second undiscarded. |
|
360 |
diags := (first nondiscardedLines) + (second nondiscardedLines) + 3. |
|
361 |
fdiag := Array new:diags withAll:0. |
|
362 |
fdiagoff := second nondiscardedLines + 1. |
|
363 |
bdiag := Array new:diags withAll:0. |
|
364 |
bdiagoff := second nondiscardedLines + 1. |
|
365 |
self |
|
366 |
compareseq:0 |
|
367 |
xlim:first nondiscardedLines |
|
368 |
yoff:0 |
|
369 |
ylim:second nondiscardedLines. |
|
370 |
fdiag := nil. |
|
371 |
bdiag := nil. |
|
372 |
self shiftBoundaries. |
|
373 |
ret := bld |
|
374 |
buildScript:first changedFlag |
|
375 |
length0:first bufferedLines |
|
376 |
changed1:second changedFlag |
|
377 |
length1:second bufferedLines. |
|
378 |
^ ret. |
|
379 |
||
380 |
"Modified: / 12-02-2010 / 13:57:09 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
381 |
! ! |
|
382 |
||
383 |
!Diff methodsFor:'initialization'! |
|
384 |
||
385 |
initialize |
|
386 |
"konstruktor" |
|
387 |
||
388 |
equivMax := 1. |
|
389 |
heuristic := false. |
|
390 |
nodiscards := false. |
|
16884 | 391 |
xvec := #(). |
392 |
yvec := #(). |
|
393 |
fdiag := #(). |
|
394 |
bdiag := #(). |
|
10014 | 395 |
filevec := Array new:2. |
396 |
snakeLimit := 20. |
|
397 |
inhibit := false. |
|
398 |
||
399 |
"Modified: / 16-02-2010 / 22:51:04 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
400 |
! ! |
|
401 |
||
402 |
!Diff methodsFor:'private'! |
|
403 |
||
404 |
compareseq:gXoff xlim:gXlim yoff:gYoff ylim:gYlim |
|
405 |
"Compare in detail contiguous subsequences of the two files |
|
406 |
which are known, as a whole, to match each other. |
|
407 |
||
408 |
The results are recorded in the vectors filevec[N].changedflag, by |
|
409 |
storing a 1 in the element for each line that is an insertion or deletion. |
|
410 |
||
411 |
The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. |
|
412 |
||
413 |
Note that XLIM, YLIM are exclusive bounds. |
|
414 |
All line numbers are origin-0 and discarded lines are not counted." |
|
415 |
||
416 |
|xoff xlim yoff ylim c d f b| |
|
417 |
||
418 |
xoff := gXoff. |
|
419 |
xlim := gXlim. |
|
420 |
yoff := gYoff. |
|
421 |
ylim := gYlim. |
|
422 |
||
423 |
"Slide down the bottom initial diagonal." |
|
424 |
[(xoff < xlim) and: [(yoff < ylim) and: [(xvec at: xoff + 1) = (yvec at: yoff + 1)]]] whileTrue: |
|
425 |
[ |
|
426 |
xoff := xoff + 1. |
|
427 |
yoff := yoff + 1. |
|
428 |
]. |
|
429 |
||
430 |
"Slide up the top initial diagonal." |
|
431 |
[(xlim > xoff) and: [(ylim > yoff) and: [(xvec at: xlim) = (yvec at: ylim)]]] whileTrue: |
|
432 |
[ |
|
433 |
xlim := xlim - 1. |
|
434 |
ylim := ylim - 1. |
|
435 |
]. |
|
436 |
||
437 |
"Handle simple cases." |
|
438 |
||
439 |
(xoff = xlim) ifTrue: |
|
440 |
[ |
|
441 |
[yoff < ylim] whileTrue: |
|
442 |
[ |
|
443 |
((filevec at: 2) changedFlag) at: (2 + ((filevec at: 2) realindexes at: yoff+1)) put: true. |
|
444 |
yoff := yoff + 1. |
|
445 |
] |
|
446 |
] |
|
447 |
ifFalse: |
|
448 |
[ |
|
449 |
(yoff = ylim) ifTrue: |
|
450 |
[ |
|
451 |
[xoff < xlim] whileTrue: |
|
452 |
[ |
|
453 |
((filevec at: 1) changedFlag) at: (2 + ((filevec at: 1) realindexes at: xoff+1)) put: true. |
|
454 |
xoff := xoff + 1. |
|
455 |
] |
|
456 |
] |
|
457 |
ifFalse: |
|
458 |
[ |
|
459 |
"Find a point of correspondence in the middle of the files." |
|
460 |
d := self diag: xoff xlim: xlim yoff: yoff ylim: ylim. |
|
461 |
c := cost. |
|
462 |
f := fdiag at: (fdiagoff + d+1). |
|
463 |
b := bdiag at: (bdiagoff + d+1). |
|
464 |
||
465 |
(c = 1) ifTrue: |
|
466 |
[ |
|
467 |
"This should be impossible, because it implies that |
|
468 |
one of the two subsequences is empty, |
|
469 |
and that case was handled above without calling `diag'. |
|
470 |
Let's verify that this is true." |
|
471 |
d := Exception new. |
|
12692 | 472 |
d raiseSignal. |
10014 | 473 |
] |
474 |
ifFalse: |
|
475 |
[ |
|
476 |
"Use that point to split this problem into two subproblems." |
|
477 |
self compareseq: xoff xlim: b yoff: yoff ylim: (b - d). |
|
478 |
"This used to use f instead of b, |
|
479 |
but that is incorrect!! |
|
480 |
It is not necessarily the case that diagonal d |
|
481 |
has a snake from b to f." |
|
482 |
self compareseq: b xlim: xlim yoff: (b - d) ylim: ylim. |
|
483 |
] |
|
484 |
] |
|
485 |
] |
|
486 |
! |
|
487 |
||
488 |
diag: anXoff xlim: anXlim yoff: aYoff ylim: aYlim |
|
489 |
|fd bd xv yv dmin dmax fmid fmax bmid bmax fmin bmin odd c cont d bigsnake tlo thi x oldx y best bestpos dd v temp k cont2| |
|
490 |
fd := fdiag. "Give the compiler a chance." |
|
491 |
bd := bdiag. "Additional help for the compiler." |
|
492 |
xv := xvec. "Still more help for the compiler." |
|
493 |
yv := yvec. "And more and more . . ." |
|
494 |
dmin := anXoff-aYlim. "Minimum valid diagonal." |
|
495 |
dmax := anXlim-aYoff. "Maximum valid diagonal." |
|
496 |
fmid := anXoff-aYoff. "Center diagonal of top-down search." |
|
497 |
bmid := anXlim-aYlim. "Center diagonal of bottom-up search." |
|
498 |
fmin := fmid. "Limits of top-down search." |
|
499 |
fmax := fmid. " --||-- " |
|
500 |
bmin := bmid. "Limits of bottom-up search." |
|
501 |
bmax := bmid. " --||-- " |
|
502 |
||
503 |
odd := (fmid-bmid) odd. "True if southeast corner is on an odd diagonal with respect to the northwest." |
|
504 |
||
505 |
"Added + 1 to all arrays since StX uses index 1 as first" |
|
506 |
fd at:(fdiagoff+fmid + 1) put: anXoff. |
|
507 |
bd at:(bdiagoff+bmid + 1) put: anXlim. |
|
508 |
||
509 |
c := 1. |
|
510 |
cont := true. |
|
511 |
[cont = true] whileTrue:[ |
|
512 |
d := nil. "Active diagonal." |
|
513 |
bigsnake := false. |
|
514 |
||
515 |
"Extend the top-down search by an edit step in each diagonal." |
|
516 |
(fmin > dmin) ifTrue:[ |
|
517 |
fmin := fmin-1. |
|
518 |
fd at:(fdiagoff + fmin - 1 + 1) put: -1. |
|
519 |
] ifFalse:[ fmin := fmin + 1. ]. |
|
520 |
(fmax < dmax) ifTrue:[ |
|
521 |
fmax := fmax+1. |
|
522 |
fd at:(fdiagoff + fmax + 1 + 1) put: -1. |
|
523 |
] ifFalse:[ fmax := fmax - 1. ]. |
|
524 |
||
525 |
d := fmax. |
|
526 |
[(d >= fmin)] whileTrue:[ |
|
527 |
tlo := fd at:(fdiagoff + d - 1 + 1). |
|
528 |
thi := fd at:(fdiagoff + d + 1 + 1). |
|
529 |
(tlo >= thi) ifTrue:[ |
|
530 |
x := tlo + 1. |
|
531 |
] ifFalse:[ x := thi. ]. |
|
532 |
oldx := x. |
|
533 |
y := x - d. |
|
534 |
[(x < anXlim) and: [(y < aYlim) and: [((xv at: (x+1)) = (yv at: (y+1)))]]] whileTrue:[ |
|
535 |
x := x+1. |
|
536 |
y := y+1. |
|
537 |
]. |
|
538 |
((x-oldx) > snakeLimit) ifTrue:[ |
|
539 |
bigsnake := true. |
|
540 |
]. |
|
541 |
fd at: (fdiagoff + d + 1) put: x. |
|
542 |
(odd and: [bmin <= d and: [d <= bmax and:[(bd at:(bdiagoff + d + 1)) <= (fd at:(fdiagoff + d + 1))]]]) ifTrue:[ |
|
543 |
cost := (2 * c) - 1. |
|
544 |
^d. |
|
545 |
] ifFalse:[ d := d - 2.]. |
|
546 |
]. |
|
547 |
||
548 |
"Similar extend the bottom-up search." |
|
549 |
(bmin > dmin) ifTrue:[ |
|
550 |
bmin := bmin - 1. |
|
551 |
bd at:(bdiagoff + bmin - 1 + 1) put: 2147483647. |
|
552 |
] ifFalse:[ bmin := bmin + 1.]. |
|
553 |
(bmax < dmax) ifTrue:[ |
|
554 |
bmax := bmax + 1. |
|
555 |
bd at:(bdiagoff + bmax + 1 + 1) put: 2147483647. |
|
556 |
] ifFalse:[ bmax := bmax - 1.]. |
|
557 |
||
558 |
d := bmax. |
|
559 |
[(d >= bmin)] whileTrue:[ |
|
560 |
tlo := bd at:(bdiagoff + d - 1 + 1). |
|
561 |
thi := bd at:(bdiagoff + d + 1 + 1). |
|
562 |
(tlo < thi) ifTrue:[ |
|
563 |
x := tlo. |
|
564 |
] ifFalse:[ x := thi - 1. ]. |
|
565 |
oldx := x. |
|
566 |
y := x - d. |
|
567 |
[(x > anXoff) and: [(y > aYoff) and: [((xv at: (x-1+1)) = (yv at: (y-1+1)))]]] whileTrue:[ |
|
568 |
x := x-1. |
|
569 |
y := y-1. |
|
570 |
]. |
|
571 |
((x-oldx) > snakeLimit) ifTrue:[ |
|
572 |
bigsnake := true. |
|
573 |
]. |
|
574 |
bd at: (bdiagoff + d + 1) put: x. |
|
575 |
((odd = false) and: [fmin <= d and: [d <= fmax and:[(bd at:(bdiagoff + d + 1)) <= (fd at:(fdiagoff + d + 1))]]]) ifTrue:[ |
|
576 |
cost := (2 * c). |
|
577 |
^d. |
|
578 |
] ifFalse:[ d := d - 2.]. |
|
579 |
]. |
|
580 |
||
581 |
"Heuristic: check occasionally for a diagonal that has made |
|
582 |
lots of progress compared with the edit distance. |
|
583 |
If we have any such, find the one that has made the most |
|
584 |
progress and return it as if it had succeeded. |
|
585 |
||
586 |
With this heuristic, for files with a constant small density |
|
587 |
of changes, the algorithm is linear in the file size." |
|
588 |
((c>200) and:[bigsnake and:[heuristic]]) ifTrue:[ |
|
589 |
best := 0. |
|
590 |
bestpos := -1. |
|
591 |
d := fmax. |
|
592 |
[(d >= fmin)] whileTrue:[ |
|
593 |
dd := d - fmid. |
|
594 |
x := fd at: (fdiagoff + d + 1). |
|
595 |
y := x - d. |
|
596 |
v := ((x - anXoff) * 2) - dd. |
|
597 |
temp := ((dd abs) + c) * 12. |
|
598 |
(v > temp) ifTrue:[ |
|
599 |
((v > best) and:[(anXoff + snakeLimit <= x) and:[(x < anXlim) and:[(aYoff + snakeLimit <= y) and:[(y < aYlim)]]]]) ifTrue:[ |
|
600 |
"We have a good enough best diagonal; |
|
601 |
now insist that it end with a significant snake." |
|
602 |
k := 1. |
|
603 |
cont2 := true. |
|
604 |
[(xvec at:(x-k + 1)) = (yvec at:(y-k + 1)) and:[cont2]] whileTrue:[ |
|
605 |
(k = snakeLimit) ifTrue:[ |
|
606 |
best := v. |
|
607 |
bestpos := d. |
|
608 |
cont2 := false. |
|
609 |
] ifFalse:[ k := k + 1.]. |
|
610 |
]. |
|
611 |
]. |
|
612 |
]. |
|
613 |
d := d - 2. |
|
614 |
]. |
|
615 |
(best > 0) ifTrue:[ |
|
616 |
cost := (2 * c) - 1. |
|
617 |
^bestpos. |
|
618 |
]. |
|
619 |
||
620 |
best := 0. |
|
621 |
d := bmax. |
|
622 |
[(d >= bmin)] whileTrue:[ |
|
623 |
dd := d - bmid. |
|
624 |
x := bd at: (bdiagoff + d + 1). |
|
625 |
y := x - d. |
|
626 |
v := ((anXlim - x) * 2) + dd. |
|
627 |
temp := ((dd abs) + c) * 12. |
|
628 |
(v > temp) ifTrue:[ |
|
629 |
((v > best) and:[(anXoff < x) and:[(x <= (anXlim - snakeLimit)) and:[(aYoff < y) and:[(y <= (aYlim - snakeLimit))]]]]) ifTrue:[ |
|
630 |
"We have a good enough best diagonal; |
|
631 |
now insist that it end with a significant snake." |
|
632 |
k := 0. |
|
633 |
cont2 := true. |
|
634 |
[((xvec at:(x+k + 1)) = (yvec at:(y+k + 1))) and:[cont2]] whileTrue:[ |
|
635 |
(k = snakeLimit) ifTrue:[ |
|
636 |
best := v. |
|
637 |
bestpos := d. |
|
638 |
cont2 := false. |
|
639 |
] ifFalse:[ k := k + 1.]. |
|
640 |
]. |
|
641 |
]. |
|
642 |
]. |
|
643 |
d := d - 2. |
|
644 |
]. |
|
645 |
(best > 0) ifTrue:[ |
|
646 |
cost := (2 * c) - 1. |
|
647 |
^bestpos. |
|
648 |
]. |
|
649 |
]. |
|
650 |
c := c + 1. |
|
651 |
] |
|
652 |
! |
|
653 |
||
654 |
discardConfusingLines |
|
655 |
"Discard lines from one file that have no matches in the other file." |
|
656 |
||
657 |
|first second| |
|
658 |
||
659 |
first := filevec at:1. |
|
660 |
second := filevec at:2. |
|
661 |
first discardConfusingLines:second felDiff:self. |
|
662 |
second discardConfusingLines: first felDiff:self. |
|
663 |
! |
|
664 |
||
665 |
equivMax |
|
666 |
^ equivMax |
|
667 |
! |
|
668 |
||
669 |
equivMax:something |
|
670 |
equivMax := something. |
|
671 |
! |
|
672 |
||
673 |
filevec |
|
674 |
^ filevec |
|
675 |
! |
|
676 |
||
677 |
nodiscards |
|
678 |
^ nodiscards |
|
679 |
! |
|
680 |
||
681 |
nodiscards:something |
|
682 |
nodiscards := something. |
|
683 |
! |
|
684 |
||
685 |
shiftBoundaries |
|
686 |
"Adjust inserts/deletes of blank lines to join changes |
|
687 |
as much as possible." |
|
688 |
||
689 |
|first second| |
|
690 |
||
691 |
(inhibit) ifTrue:[ |
|
692 |
^ nil. |
|
693 |
]. |
|
694 |
first := filevec at:1. |
|
695 |
second := filevec at:2. |
|
696 |
first shiftBoundaries:second. |
|
697 |
second shiftBoundaries:first. |
|
698 |
! ! |
|
699 |
||
700 |
!Diff::Change class methodsFor:'documentation'! |
|
701 |
||
702 |
documentation |
|
703 |
" |
|
704 |
The result of comparison is an ""edit script"": a chain of change objects. |
|
705 |
Each change represents one place where some lines are deleted |
|
706 |
and some are inserted. |
|
707 |
||
708 |
LINE0 and LINE1 are the first affected lines in the two files (origin 0). |
|
709 |
DELETED is the number of lines deleted here from file 0. |
|
710 |
INSERTED is the number of lines inserted here in file 1. |
|
711 |
||
712 |
If DELETED is 0 then LINE0 is the number of the line before |
|
713 |
which the insertion was done; vice versa for INSERTED and LINE1. |
|
714 |
" |
|
715 |
! ! |
|
716 |
||
717 |
!Diff::Change methodsFor:'accessing'! |
|
718 |
||
719 |
deleted |
|
720 |
"Line number of 1st deleted line." |
|
721 |
^ deleted |
|
722 |
! |
|
723 |
||
724 |
inserted |
|
725 |
"# lines of file 0 changed here." |
|
726 |
^ inserted |
|
727 |
! |
|
728 |
||
729 |
line0 |
|
730 |
"Line number of 1st deleted line." |
|
731 |
^ line0 |
|
732 |
! |
|
733 |
||
734 |
line1 |
|
735 |
"Line number of 1st inserted line." |
|
736 |
^ line1 |
|
737 |
! ! |
|
738 |
||
739 |
!Diff::Change methodsFor:'enumerating'! |
|
740 |
||
741 |
do: aBlock |
|
742 |
||
743 |
| chg | |
|
744 |
chg := self. |
|
745 |
[ chg notNil ] whileTrue: |
|
746 |
[aBlock value: chg. |
|
747 |
chg := chg nextLink]. |
|
748 |
||
749 |
"Created: / 16-02-2010 / 22:53:40 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
750 |
! ! |
|
751 |
||
752 |
!Diff::Change methodsFor:'instance creation'! |
|
753 |
||
754 |
newLine0:aLine0 line1:aLine1 deleted:aDeleted inserted:aInserted next: nextChange |
|
755 |
"Cons an additional entry onto the front of an edit script OLD. |
|
756 |
LINE0 and LINE1 are the first affected lines in the two files (origin 0). |
|
757 |
DELETED is the number of lines deleted here from file 0. |
|
758 |
INSERTED is the number of lines inserted here in file 1. |
|
759 |
||
760 |
If DELETED is 0 then LINE0 is the number of the line before |
|
761 |
which the insertion was done; vice versa for INSERTED and LINE1." |
|
762 |
||
763 |
line0 := aLine0. |
|
764 |
line1 := aLine1. |
|
765 |
deleted := aDeleted. |
|
766 |
inserted := aInserted. |
|
767 |
nextLink := nextChange. |
|
768 |
||
769 |
"Modified: / 12-02-2010 / 13:42:30 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
770 |
! ! |
|
771 |
||
772 |
!Diff::Data methodsFor:'accessing'! |
|
773 |
||
774 |
bufferedLines |
|
775 |
^ bufferedLines |
|
776 |
! |
|
777 |
||
778 |
changedFlag |
|
779 |
^ changedFlag |
|
780 |
! |
|
781 |
||
782 |
nondiscardedLines |
|
783 |
^ nondiscardedLines |
|
784 |
! |
|
785 |
||
786 |
realindexes |
|
787 |
^ realindexes |
|
788 |
! |
|
789 |
||
790 |
undiscarded |
|
791 |
^ undiscarded |
|
792 |
! ! |
|
793 |
||
794 |
!Diff::Data methodsFor:'default'! |
|
795 |
||
796 |
clear |
|
797 |
"Allocate changed array for the results of comparison. |
|
798 |
Allocate a flag for each line of each file, saying whether that line |
|
799 |
is an insertion or deletion. allocate an extra element, always zero, |
|
800 |
at each end of each vector." |
|
801 |
||
802 |
changedFlag := Array new:bufferedLines + 2 withAll:false |
|
803 |
||
804 |
"Modified: / 12-02-2010 / 13:55:52 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
805 |
! |
|
806 |
||
807 |
discard:discards felDiff:fellDiffClass |
|
808 |
"Actually discard the lines. |
|
809 |
@param discards flags lines to be discarded" |
|
810 |
|end j i| |
|
811 |
end:=bufferedLines. |
|
812 |
j:=0. |
|
813 |
i:=0. |
|
814 |
[i<end]whileTrue:[ |
|
815 |
(fellDiffClass nodiscards or:[(discards at:i+1)=0])ifTrue:[ |
|
816 |
undiscarded at:j+1 put:(equivs at:i+1). |
|
817 |
realindexes at:j+1 put:i. |
|
818 |
j:=j+1. |
|
819 |
]ifFalse:[ |
|
820 |
changedFlag at:(i+1+1) put:true. |
|
821 |
]. |
|
822 |
nondiscardedLines :=j. |
|
823 |
i:=i+1. |
|
824 |
]. |
|
825 |
! |
|
826 |
||
827 |
discardConfusingLines: f felDiff: felDiff |
|
828 |
" |
|
829 |
Discard lines that have no matches in another file. |
|
830 |
||
831 |
A line which is discarded will not be considered by the actual |
|
832 |
comparison algorithm; it will be as if that line were not in the file. |
|
833 |
The file's `realindexes' table maps virtual line numbers |
|
834 |
(which don't count the discarded lines) into real line numbers; |
|
835 |
this is how the actual comparison algorithm produces results |
|
836 |
that are comprehensible when the discarded lines are counted. |
|
837 |
||
838 |
When we discard a line, we also mark it as a deletion or insertion |
|
839 |
so that it will be printed in the output. |
|
840 |
@param f the other file |
|
841 |
" |
|
842 |
| discarded | |
|
843 |
self clear. |
|
844 |
||
845 |
"Set up table of which lines are going to be discarded." |
|
846 |
discarded := self discardable: (f equivCount: felDiff). |
|
847 |
||
848 |
"Don't really discard the provisional lines except when they occur |
|
849 |
in a run of discardables, with nonprovisionals at the beginning |
|
850 |
and end." |
|
851 |
self filterDiscards: discarded. |
|
852 |
||
853 |
"Actually discard the lines." |
|
854 |
self discard: discarded felDiff: felDiff. |
|
855 |
! |
|
856 |
||
857 |
discardable: counts |
|
858 |
" Mark to be discarded each line that matches no line of another file. |
|
859 |
If a line matches many lines, mark it as provisionally discardable. |
|
860 |
@see equivCount() |
|
861 |
@param counts The count of each equivalence number for the other file. |
|
862 |
@return 0=nondiscardable, 1=discardable or 2=provisionally discardable |
|
863 |
for each line" |
|
864 |
| nmatch i end discards equivs2 many tem | |
|
865 |
end := bufferedLines. |
|
866 |
discards := Array new: end. |
|
867 |
equivs2 := equivs. |
|
868 |
many := 5. |
|
869 |
tem := (end / 64). |
|
870 |
tem :=tem asInteger. |
|
871 |
tem := tem >> 2. |
|
872 |
i:=1. |
|
873 |
[i<=end]whileTrue:[discards at:i put:0. |
|
874 |
i:=i+1.]. |
|
875 |
"Multiply MANY by approximate square root of number of lines. |
|
876 |
That is the threshold for provisionally discardable lines. " |
|
877 |
[tem > 0] |
|
878 |
whileTrue: [many := many * 2. |
|
879 |
tem := tem >> 2 |
|
880 |
]. |
|
881 |
i := 1. |
|
882 |
[i <= end] |
|
883 |
whileTrue: [(equivs2 at: i) |
|
884 |
= 0 |
|
885 |
ifFalse: [nmatch := counts |
|
886 |
at: (equivs2 at: i)+1. |
|
887 |
nmatch = 0 |
|
888 |
ifTrue: [discards at: i put: 1] |
|
889 |
ifFalse: [nmatch > many |
|
890 |
ifTrue: [discards at: i put: 2]]]. |
|
891 |
i := i + 1]. |
|
892 |
||
893 |
^ discards |
|
894 |
! |
|
895 |
||
896 |
equivCount: felDiff |
|
897 |
| pom i equivCount size| |
|
898 |
equivCount := Array new: (felDiff equivMax) withAll: 0. |
|
899 |
i:=1. |
|
900 |
size:=equivCount size. |
|
901 |
[i<=size]whileTrue:[ |
|
902 |
equivCount at:i put:0. |
|
903 |
i:=i+1. |
|
904 |
]. |
|
905 |
||
906 |
i := 0. |
|
907 |
[i < bufferedLines] |
|
908 |
whileTrue: [ |
|
909 |
pom:=equivs at: i+1. |
|
910 |
pom := equivCount at: pom+1. |
|
911 |
pom := pom + 1. |
|
912 |
equivCount at: (equivs at: i+1)+1 put: pom. |
|
913 |
i := i + 1.]. |
|
914 |
^ equivCount |
|
915 |
||
916 |
"Modified: / 12-02-2010 / 13:56:10 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
917 |
! |
|
918 |
||
919 |
fileData |
|
920 |
"konstruktor" |
|
16884 | 921 |
equivs := #(). |
922 |
undiscarded := #(). |
|
923 |
realindexes := #(). |
|
10014 | 924 |
nondiscardedLines := 0. |
16884 | 925 |
changedFlag := #(). |
10014 | 926 |
! |
927 |
||
928 |
fileData: data hashTable: h felDiff:fellDiffClass |
|
929 |
| i size ir| |
|
930 |
bufferedLines := data size. |
|
931 |
||
932 |
equivs := Array new: bufferedLines withAll: 0. |
|
933 |
||
934 |
undiscarded := Array new: bufferedLines withAll: 0. |
|
935 |
||
936 |
realindexes := Array new: bufferedLines withAll: 0. |
|
937 |
||
938 |
size := data size. |
|
939 |
i := 1. |
|
940 |
[i<=size]whileTrue: [ir := h at: (data at: i) ifAbsent: nil. |
|
941 |
ir isNil |
|
942 |
ifTrue: [ |
|
943 |
equivs at: i put:fellDiffClass equivMax. |
|
944 |
fellDiffClass equivMax:( fellDiffClass equivMax + 1). |
|
945 |
h at: (data at: i) put: (equivs at: i)] |
|
946 |
ifFalse: [equivs at: i put: ir]. |
|
947 |
i:=i+1]. |
|
948 |
||
949 |
"Modified: / 12-02-2010 / 13:56:42 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
950 |
! |
|
951 |
||
952 |
filterDiscards:discards |
|
953 |
"Don't really discard the provisional lines except when they occur |
|
954 |
in a run of discardables, with nonprovisionals at the beginning |
|
955 |
and end." |
|
956 |
||
957 |
|end i j length provisional bool consec minimum tem| |
|
958 |
||
959 |
end := bufferedLines. |
|
960 |
i := 0. |
|
961 |
[ i < end ] whileTrue:[ |
|
962 |
"Cancel provisional discards not in middle of run of discards." |
|
963 |
((discards at:i + 1) isNil) ifTrue:[ |
|
964 |
discards at:i + 1 put:0 |
|
965 |
]. |
|
966 |
(discards at:i + 1) = 2 ifTrue:[ |
|
967 |
discards at:i + 1 put:0 |
|
968 |
] ifFalse:[ |
|
969 |
(discards at:i + 1) = 0 ifFalse:[ |
|
970 |
"We have found a nonprovisional discard." |
|
971 |
provisional := 0. |
|
972 |
j := i. |
|
973 |
bool := true. |
|
974 |
"Find end of this run of discardable lines. |
|
975 |
Count how many are provisionally discardable." |
|
976 |
[ bool and:[ j < end ] ] whileTrue:[ |
|
977 |
(discards at:j + 1) = 2 ifTrue:[ |
|
978 |
provisional := provisional + 1 |
|
979 |
]. |
|
980 |
(discards at:j + 1) = 0 ifTrue:[ |
|
981 |
bool := false |
|
982 |
] ifFalse:[ j := j + 1 ] |
|
983 |
]. |
|
984 |
"Cancel provisional discards at end, and shrink the run." |
|
985 |
[ |
|
986 |
j > i and:[ (discards at:j - 1 + 1) = 2 ] |
|
987 |
] whileTrue:[ |
|
988 |
j := j - 1. |
|
989 |
discards at:j + 1 put:0. |
|
990 |
provisional := provisional - 1 |
|
991 |
]. |
|
992 |
"Now we have the length of a run of discardable lines |
|
993 |
whose first and last are not provisional." |
|
994 |
length := j - i. |
|
995 |
(provisional * 4 > length) ifTrue:[ |
|
996 |
[ j > i ] whileTrue:[ |
|
997 |
j := j - 1. |
|
998 |
(discards at:j + 1) = 2 ifTrue:[ |
|
999 |
discards at:j + 1 put:0 |
|
1000 |
] |
|
1001 |
] |
|
1002 |
] ifFalse:[ |
|
1003 |
"MINIMUM is approximate square root of LENGTH/4. |
|
1004 |
A subrun of two or more provisionals can stand |
|
1005 |
when LENGTH is at least 16. |
|
1006 |
A subrun of 4 or more can stand when LENGTH >= 64." |
|
1007 |
minimum := 1. |
|
1008 |
tem := (length / 4) asInteger. |
|
1009 |
tem := tem >> 2. |
|
1010 |
[ tem > 0 ] whileTrue:[ |
|
1011 |
minimum := minimum * 2. |
|
1012 |
tem := tem >> 2 |
|
1013 |
]. |
|
1014 |
minimum := minimum + 1. |
|
1015 |
"Cancel any subrun of MINIMUM or more provisionals |
|
1016 |
within the larger run." |
|
1017 |
j := 0. |
|
1018 |
consec := 0. |
|
1019 |
[ j < length ] whileTrue:[ |
|
1020 |
(discards at:i + j + 1) ~= 2 ifTrue:[ |
|
1021 |
consec := 0 |
|
1022 |
] ifFalse:[ |
|
1023 |
consec := consec + 1. |
|
1024 |
minimum = consec ifTrue:[ |
|
1025 |
"Back up to start of subrun, to cancel it all." |
|
1026 |
j := j - consec |
|
1027 |
] ifFalse:[ |
|
1028 |
discards at:i + j + 1 put:0 |
|
1029 |
] |
|
1030 |
]. |
|
1031 |
j := j + 1 |
|
1032 |
]. |
|
1033 |
"Scan from beginning of run |
|
1034 |
until we find 3 or more nonprovisionals in a row |
|
1035 |
or until the first nonprovisional at least 8 lines in. |
|
1036 |
Until that point, cancel any provisionals." |
|
1037 |
j := 0. |
|
1038 |
consec := 0. |
|
1039 |
bool := true. |
|
1040 |
[ |
|
1041 |
bool and:[ j < length ] |
|
1042 |
] whileTrue:[ |
|
1043 |
(j >= 8 and:[ (discards at:i + j + 1) = 1 ]) ifTrue:[ |
|
1044 |
bool := false |
|
1045 |
] ifFalse:[ |
|
1046 |
(discards at:i + j + 1) = 2 ifTrue:[ |
|
1047 |
consec := 0. |
|
1048 |
discards at:i + j + 1 put:0 |
|
1049 |
] ifFalse:[ |
|
1050 |
(discards at:i + j + 1) = 0 ifTrue:[ |
|
1051 |
consec := 0 |
|
1052 |
] ifFalse:[ |
|
1053 |
consec := consec + 1 |
|
1054 |
] |
|
1055 |
] |
|
1056 |
]. |
|
1057 |
(consec = 3) ifTrue:[ |
|
1058 |
bool := false |
|
1059 |
]. |
|
1060 |
j := j + 1 |
|
1061 |
]. |
|
1062 |
"I advances to the last line of the run." |
|
1063 |
i := i + length - 1. |
|
1064 |
bool := true. |
|
1065 |
"Same thing, from end. " |
|
1066 |
j := 0. |
|
1067 |
consec := 0. |
|
1068 |
[ |
|
1069 |
bool and:[ j < length ] |
|
1070 |
] whileTrue:[ |
|
1071 |
(j >= 8 and:[ (discards at:i - j + 1) = 1 ]) ifTrue:[ |
|
1072 |
bool := false |
|
1073 |
] ifFalse:[ |
|
1074 |
(discards at:i - j + 1) = 2 ifTrue:[ |
|
1075 |
consec := 0. |
|
1076 |
discards at:i - j + 1 put:0 |
|
1077 |
] ifFalse:[ |
|
1078 |
(discards at:i - j + 1) = 0 ifTrue:[ |
|
1079 |
consec := 0 |
|
1080 |
] ifFalse:[ |
|
1081 |
consec := consec + 1 |
|
1082 |
] |
|
1083 |
] |
|
1084 |
]. |
|
1085 |
(consec = 3) ifTrue:[ |
|
1086 |
bool := false |
|
1087 |
]. |
|
1088 |
j := j + 1 |
|
1089 |
] |
|
1090 |
] |
|
1091 |
] |
|
1092 |
]. |
|
1093 |
i := i + 1. |
|
1094 |
] |
|
1095 |
! |
|
1096 |
||
1097 |
shiftBoundaries:f |
|
1098 |
"Adjust inserts/deletes of blank lines to join changes |
|
1099 |
as much as possible. |
|
1100 |
We do something when a run of changed lines include a blank |
|
1101 |
line at one end and have an excluded blank line at the other. |
|
1102 |
We are free to choose which blank line is included. |
|
1103 |
`compareseq' always chooses the one at the beginning, |
|
1104 |
but usually it is cleaner to consider the following blank line |
|
1105 |
to be the change. The only exception is if the preceding blank line |
|
1106 |
would join this change to other changes. |
|
1107 |
param f the file being compared against" |
|
1108 |
||
1109 |
|changed otherChanged i j iEnd preceding otherPreceding bool start end otherStart bool2| |
|
1110 |
||
1111 |
changed := changedFlag. |
|
1112 |
otherChanged := f changedFlag. |
|
1113 |
i := 0. |
|
1114 |
j := 0. |
|
1115 |
iEnd := bufferedLines. |
|
1116 |
preceding := -1. |
|
1117 |
otherPreceding := -1. |
|
1118 |
bool := true. |
|
1119 |
bool2 := true. |
|
1120 |
[ bool ] whileTrue:[ |
|
1121 |
[ |
|
1122 |
"Scan forwards to find beginning of another run of changes. |
|
1123 |
Also keep track of the corresponding point in the other file. " |
|
1124 |
i < iEnd and:[ ((changed at:(i + 1+1)) = false)] |
|
1125 |
] whileTrue:[ |
|
1126 |
[otherChanged at:( 1 + j +1)] whileTrue:[ |
|
1127 |
"Non-corresponding lines in the other file |
|
1128 |
will count as the preceding batch of changes." |
|
1129 |
j := j + 1. |
|
1130 |
otherPreceding := j. |
|
1131 |
]. |
|
1132 |
j:=j+1. |
|
1133 |
i := i + 1. |
|
1134 |
]. |
|
1135 |
||
1136 |
(i >= iEnd) ifTrue:[ |
|
1137 |
bool := false. |
|
1138 |
] ifFalse:[ |
|
1139 |
start := i. |
|
1140 |
otherStart := j. |
|
1141 |
bool2 := true. |
|
1142 |
"Now find the end of this run of changes." |
|
1143 |
[ bool2 ] whileTrue:[ |
|
1144 |
[i < iEnd and:[ changed at:(i + 1+1) ]] |
|
1145 |
whileTrue:[ i := i + 1. ]. |
|
1146 |
end := i. |
|
1147 |
"If the first changed line matches the following unchanged one, |
|
1148 |
and this run does not follow right after a previous run, |
|
1149 |
and there are no lines deleted from the other file here, |
|
1150 |
then classify the first changed line as unchanged |
|
1151 |
and the following line as changed in its place. */ |
|
1152 |
||
1153 |
/* You might ask, how could this run follow right after another? |
|
1154 |
Only because the previous run was shifted here." |
|
1155 |
(end ~= iEnd and:[((equivs at:start+1) = (equivs at:end+1)) |
|
1156 |
and:[((otherChanged at:(j + 1+1)) = false) |
|
1157 |
and:[false = ((preceding >= 0 and:[start = preceding]) or:[ otherPreceding >= 0 and:[ otherStart = otherPreceding ]]) |
|
1158 |
] |
|
1159 |
] |
|
1160 |
]) |
|
1161 |
ifTrue:[ |
|
1162 |
changed at:(1 + end+1) put:true. |
|
1163 |
end := end + 1. |
|
1164 |
changed at:(1 + start+1) put:false. |
|
1165 |
start := start + 1. |
|
1166 |
" Since one line-that-matches is now before this run |
|
1167 |
instead of after, we must advance in the other file |
|
1168 |
to keep in synch." |
|
1169 |
i := i + 1. |
|
1170 |
j := j + 1. |
|
1171 |
] |
|
1172 |
ifFalse:[ bool2 := false ]. |
|
1173 |
]. |
|
1174 |
preceding := i. |
|
1175 |
otherPreceding := j. |
|
1176 |
]. |
|
1177 |
]. |
|
1178 |
! ! |
|
1179 |
||
1180 |
!Diff::ForwardScript methodsFor:'default'! |
|
1181 |
||
1182 |
buildScript:aChanged0 length0:aLen0 changed1:aChanged1 length1:aLen1 |
|
1183 |
"Scan the tables of which lines are inserted and deleted, |
|
1184 |
producing an edit script in forward order." |
|
1185 |
||
1186 |
|script i0 i1 line0 line1| |
|
1187 |
script := nil. |
|
1188 |
i0 := aLen0. |
|
1189 |
i1 := aLen1. |
|
1190 |
[i0 >= 0 or:[i1 >= 0]] whileTrue: |
|
1191 |
[((aChanged0 at:i0 + 1) or:[aChanged1 at:i1 + 1]) |
|
1192 |
ifTrue: |
|
1193 |
[line0 := i0. |
|
1194 |
line1 := i1. |
|
1195 |
"Find # lines changed here in each file." |
|
1196 |
[aChanged0 at:i0 + 1] whileTrue:[i0 := i0 - 1]. |
|
1197 |
[aChanged1 at:i1 + 1] whileTrue:[i1 := i1 - 1]. |
|
1198 |
"Record this change." |
|
1199 |
script := Diff::Change new |
|
1200 |
newLine0:i0 |
|
1201 |
line1:i1 |
|
1202 |
deleted:line0 - i0 |
|
1203 |
inserted:line1 - i1 |
|
1204 |
next:script.]. |
|
1205 |
"We have reached lines in the two files that match each other." |
|
1206 |
i0 := i0 - 1. |
|
1207 |
i1 := i1 - 1.]. |
|
1208 |
^script. |
|
1209 |
||
1210 |
"Modified: / 16-02-2010 / 22:49:18 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
1211 |
! ! |
|
1212 |
||
1213 |
!Diff::ReverseScript methodsFor:'default'! |
|
1214 |
||
1215 |
buildScript:aChanged0 length0:aLen0 changed1:aChanged1 length1:aLen1 |
|
1216 |
"Scan the tables of which lines are inserted and deleted, |
|
1217 |
producing an edit script in reverse order." |
|
1218 |
||
1219 |
|script i0 i1 line0 line1| |
|
1220 |
script := nil. |
|
1221 |
i0 := 0. |
|
1222 |
i1 := 0. |
|
1223 |
[i0 < aLen0 or:[i1 < aLen1]] whileTrue: |
|
1224 |
[((aChanged0 at:(1 + i0 + 1)) or:[aChanged1 at:(1 + i1 + 1)]) |
|
1225 |
ifTrue: |
|
1226 |
[line0 := i0. |
|
1227 |
line1 := i1. |
|
1228 |
"Find # lines changed here in each file." |
|
1229 |
[aChanged0 at:(1 + i0 + 1)] whileTrue:[i0 := i0 + 1]. |
|
1230 |
[aChanged1 at:(1 + i1 + 1)] whileTrue:[i1 := i1 + 1]. |
|
1231 |
"Record this change." |
|
1232 |
script := Diff::Change new |
|
1233 |
newLine0:line0 |
|
1234 |
line1:line1 |
|
1235 |
deleted:(i0 - line0) |
|
1236 |
inserted:(i1 - line1) |
|
1237 |
next:script.]. |
|
1238 |
"We have reached lines in the two files that match each other." |
|
1239 |
i0 := i0 + 1. |
|
1240 |
i1 := i1 + 1.]. |
|
1241 |
^script. |
|
1242 |
||
1243 |
"Modified: / 12-02-2010 / 14:15:27 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
1244 |
! ! |
|
1245 |
||
1246 |
!Diff class methodsFor:'documentation'! |
|
1247 |
||
1248 |
version_CVS |
|
16884 | 1249 |
^ '$Header$' |
10014 | 1250 |
! |
1251 |
||
12431
9f0c59c742d5
Added LintRuleSettingsApplication and LintRuleEditDialog to define user-defined rule sets.
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
12401
diff
changeset
|
1252 |
version_HG |
9f0c59c742d5
Added LintRuleSettingsApplication and LintRuleEditDialog to define user-defined rule sets.
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
12401
diff
changeset
|
1253 |
|
9f0c59c742d5
Added LintRuleSettingsApplication and LintRuleEditDialog to define user-defined rule sets.
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
12401
diff
changeset
|
1254 |
^ '$Changeset: <not expanded> $' |
9f0c59c742d5
Added LintRuleSettingsApplication and LintRuleEditDialog to define user-defined rule sets.
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
12401
diff
changeset
|
1255 |
! |
9f0c59c742d5
Added LintRuleSettingsApplication and LintRuleEditDialog to define user-defined rule sets.
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
12401
diff
changeset
|
1256 |
|
10014 | 1257 |
version_SVN |
16884 | 1258 |
^ '$Id$' |
10014 | 1259 |
! ! |
12692 | 1260 |