author | Jan Vrany <jan.vrany@fit.cvut.cz> |
Fri, 07 Sep 2012 18:28:09 +0100 | |
branch | jv |
changeset 12287 | 400a99059170 |
parent 12128 | a7ff7d66ee85 |
child 12401 | 4714b9640528 |
permissions | -rw-r--r-- |
10014 | 1 |
" |
10089 | 2 |
Copyright (c) 2007-2010 Jan Vrany, SWING Research Group, Czech Technical University in Prague |
3 |
Copyright (c) 2009-2010 eXept Software AG |
|
10014 | 4 |
|
10089 | 5 |
Permission is hereby granted, free of charge, to any person |
6 |
obtaining a copy of this software and associated documentation |
|
7 |
files (the 'Software'), to deal in the Software without |
|
8 |
restriction, including without limitation the rights to use, |
|
9 |
copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
10 |
copies of the Software, and to permit persons to whom the |
|
11 |
Software is furnished to do so, subject to the following |
|
12 |
conditions: |
|
13 |
||
14 |
The above copyright notice and this permission notice shall be |
|
15 |
included in all copies or substantial portions of the Software. |
|
16 |
||
17 |
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, |
|
18 |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
|
19 |
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
20 |
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
|
21 |
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
|
22 |
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
23 |
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
|
24 |
OTHER DEALINGS IN THE SOFTWARE. |
|
10014 | 25 |
" |
26 |
"{ Package: 'stx:libtool' }" |
|
27 |
||
28 |
Object subclass:#Diff |
|
29 |
instanceVariableNames:'equivMax heuristic nodiscards xvec yvec fdiag bdiag fdiagoff |
|
30 |
bdiagoff filevec cost snakeLimit inhibit' |
|
31 |
classVariableNames:'' |
|
32 |
poolDictionaries:'' |
|
12287 | 33 |
category:'Collections-Support' |
10014 | 34 |
! |
35 |
||
36 |
Link subclass:#Change |
|
37 |
instanceVariableNames:'inserted deleted line0 line1' |
|
38 |
classVariableNames:'' |
|
39 |
poolDictionaries:'' |
|
40 |
privateIn:Diff |
|
41 |
! |
|
42 |
||
43 |
Object subclass:#Data |
|
44 |
instanceVariableNames:'bufferedLines equivs undiscarded realindexes nondiscardedLines |
|
45 |
changedFlag' |
|
46 |
classVariableNames:'' |
|
47 |
poolDictionaries:'' |
|
48 |
privateIn:Diff |
|
49 |
! |
|
50 |
||
51 |
Object subclass:#ForwardScript |
|
52 |
instanceVariableNames:'' |
|
53 |
classVariableNames:'' |
|
54 |
poolDictionaries:'' |
|
55 |
privateIn:Diff |
|
56 |
! |
|
57 |
||
58 |
Object subclass:#ReverseScript |
|
59 |
instanceVariableNames:'' |
|
60 |
classVariableNames:'' |
|
61 |
poolDictionaries:'' |
|
62 |
privateIn:Diff |
|
63 |
! |
|
64 |
||
65 |
!Diff class methodsFor:'documentation'! |
|
66 |
||
67 |
copyright |
|
68 |
" |
|
10089 | 69 |
Copyright (c) 2007-2010 Jan Vrany, SWING Research Group, Czech Technical University in Prague |
70 |
Copyright (c) 2009-2010 eXept Software AG |
|
71 |
||
72 |
Permission is hereby granted, free of charge, to any person |
|
73 |
obtaining a copy of this software and associated documentation |
|
74 |
files (the 'Software'), to deal in the Software without |
|
75 |
restriction, including without limitation the rights to use, |
|
76 |
copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
77 |
copies of the Software, and to permit persons to whom the |
|
78 |
Software is furnished to do so, subject to the following |
|
79 |
conditions: |
|
10014 | 80 |
|
10089 | 81 |
The above copyright notice and this permission notice shall be |
82 |
included in all copies or substantial portions of the Software. |
|
83 |
||
84 |
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, |
|
85 |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
|
86 |
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
87 |
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
|
88 |
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
|
89 |
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
90 |
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
|
91 |
OTHER DEALINGS IN THE SOFTWARE. |
|
10014 | 92 |
" |
93 |
! |
|
94 |
||
95 |
documentation |
|
96 |
" |
|
97 |
I'm standard diff implementation written purely in Smalltalk. I can |
|
98 |
compute differences between two sequenceable collections, not neccesaarily |
|
99 |
holding strings. Elements are compared using #=. |
|
100 |
||
101 |
Result of comparison is an edit script, a linked list of Diff::Changes, |
|
102 |
each keeping one difference: whether change is insert and/or delete, |
|
103 |
and positions in A and B. |
|
104 |
||
105 |
I'm a port of Java diff. |
|
106 |
||
107 |
[author:] |
|
108 |
Jakub Zelenka (zelenj7@fel.cvut.cz) |
|
109 |
Vladislav Skoumal (skoumal@skoumal.net) |
|
110 |
Jan Vrany (jan.vrany@fit.cvut.cz) |
|
111 |
||
112 |
[instance variables:] |
|
113 |
||
114 |
[class variables:] |
|
115 |
||
116 |
[see also:] |
|
117 |
||
118 |
" |
|
119 |
! |
|
120 |
||
121 |
documentation_czech |
|
122 |
" |
|
123 |
první fáze: |
|
124 |
############################################################################################################################# |
|
125 |
first := #('prvni' 'druhy' 'treti' 'treti' 'paty' 'zeleny' 'ruzovy' ). |
|
126 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy' 'treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa' ). |
|
127 |
############################################################################################################################ |
|
128 |
First a second pøedstavujou dvì pole, které chceme porovnávat. Jednotlivé položky v poli si lze pøedstavit jako øádky, pøípadnì jako slova v øádku. |
|
129 |
Podle toho, co je potøeba porovnávat. |
|
130 |
||
131 |
***************************************************************************************************************************** |
|
132 |
diff := FelDiff new felDiff. |
|
133 |
***************************************************************************************************************************** |
|
134 |
Zde probíhá inicializace defaultníh promìnných. Funguje to jako konstruktor. |
|
135 |
||
136 |
############################################################################################################################ |
|
137 |
diff diff: first b: second |
|
138 |
############################################################################################################################ |
|
139 |
První fáze nutná pro porovnávání polí. Vzniknou dvì instance tøíde filedata uložené do pole. Tyto instance budou obsahovat následující údaje: |
|
140 |
||
141 |
filevec[1].equivs=#(1 2 3 3 4 5 6) |
|
142 |
filevec[1].bufferedLines=7 |
|
143 |
filevec[1].changedFlag=#() |
|
144 |
||
145 |
filevec[2].equivs=#(1 3 6 7 3 8 9 10 10 11 12 13 14 15 10 10) |
|
146 |
filevec[2].bufferedLines=16 |
|
147 |
filevec[2].changedFlag=#() |
|
148 |
||
149 |
V zásadì se vytvoøila structura Dictionary, která jednotlivé øádky(slova) pøevedla na èísla. Pole equvs pak pøedstavuje èíselnì slova(øádky). |
|
150 |
èísla, která se nalézají v obou dbou polí equivs znaèí, že soubory sdílí alespoò nìjaké slovo(øádek). |
|
151 |
||
152 |
***************************************************************************************************************************** |
|
153 |
change:= diff diff2: true. |
|
154 |
***************************************************************************************************************************** |
|
155 |
||
156 |
Zde již dochází k porovnání obou dvou polí s øádky(slovy). Lze si vybrat mezi forwardscriptem a reversescriptem. |
|
157 |
||
158 |
1) metoda discardconfusinglines |
|
159 |
výsledek: |
|
160 |
filevec[1].undiscardeded=#(1 3 3 5 6 0 0) |
|
161 |
filevec[1].realIndexes= #(0 2 3 5 6 0 0) |
|
162 |
filevec[1].nondiscardedLines=5 |
|
163 |
filevec[1].changedFlag=#(false false true false false true false false false) |
|
164 |
||
165 |
filevec[2].undiscardeded=#(1 3 5 6 3 0 0 0 0 0 0 0 0 0 0 0) |
|
166 |
filevec[2].realIndexes= #(0 1 2 3 4 0 0 0 0 0 0 0 0 0 0 0) |
|
167 |
filevec[2].nondiscardedLines=5 |
|
168 |
||
169 |
Undiscarded- Øádky soubory, které jsou shodné. |
|
170 |
RealIndexes - indexy øádkù v poli(je potøeba pøièíst jedna) |
|
171 |
- to znamená že index prvního 3->3 pozice v prvním vstupním poli |
|
172 |
- index druhého 3->2 pozice v druhém vstupním poli a 3->5 pozice v druhém vstupním poli |
|
173 |
NondiscardedLines- znaèí kolik èádkù(slov) je shodných v obou polích. |
|
174 |
||
175 |
2)Následuje porovnávání jednotlivých polí a vytvoøení výsledku |
|
176 |
||
177 |
3)Výsledek Reverse skript |
|
178 |
||
179 |
inserted=12('treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa') |
|
180 |
deleted=0 |
|
181 |
line0=7('ruzovy') |
|
182 |
line1=4('ruzovy') |
|
183 |
link=next |
|
184 |
||
185 |
inserted- kolik znakù bylo vloženo |
|
186 |
deleted - kolik znakù bylo smazáno |
|
187 |
line0 - poøadí znaku za kterým bylo nìco vloženo(smazáno) v prvním poli(poslední znak který je shodný v obou polích) |
|
188 |
line1 - poøadí znaku za kterým bylo nìco smazáno(vloženo) v prvním poli(poslední znak který je shodný v obou polích) |
|
189 |
||
190 |
Takže po znaku na pozici 4, je 12 vložených znakù oproti prvnímu |
|
191 |
||
192 |
Zbytek pole vypadá takto: |
|
193 |
||
194 |
first := #('prvni' 'druhy' 'treti' 'treti' 'paty' 'zeleny' 'ruzovy' ). |
|
195 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy'). |
|
196 |
||
197 |
link není null a tudíž odkazuje na další informace o zmìnách. |
|
198 |
inserted=0 |
|
199 |
deleted=2('treti' 'paty') |
|
200 |
line0=3('treti') |
|
201 |
line1=2('treti') |
|
202 |
link=next |
|
203 |
||
204 |
zbytek pole vypadá takto: |
|
205 |
first := #('prvni' 'druhy' 'treti' 'zeleny' 'ruzovy' ). |
|
206 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy'). |
|
207 |
||
208 |
link není null a tudíž odkazuje na další informace o zmìnách. |
|
209 |
inserted=0 |
|
210 |
deleted=1('druhy') |
|
211 |
line0=1('prvni') |
|
212 |
line1=1('prvni') |
|
213 |
link=nil |
|
214 |
||
215 |
zbytek pole vypadá takto: |
|
216 |
first := #('prvni' 'treti' 'zeleny' 'ruzovy' ). |
|
217 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy'). |
|
218 |
||
219 |
link je nil. Neexistuje žádná zmìna a tato pole jsou shodná. |
|
220 |
||
221 |
4)Výsledek Forward skript |
|
222 |
||
223 |
inserted=0 |
|
224 |
deleted=1('druhy') |
|
225 |
line0=1('prvni') |
|
226 |
line1=1('prvni') |
|
227 |
link=next |
|
228 |
||
229 |
zbytek pole vypadá takto: |
|
230 |
first := #('prvni' 'treti' 'treti' 'paty' 'zeleny' 'ruzovy' ). |
|
231 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy' 'treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa' ). |
|
232 |
||
233 |
link není nil jdeme na odkaz: |
|
234 |
inserted=0 |
|
235 |
deleted=2('treti' 'paty') |
|
236 |
line0=3('treti') |
|
237 |
line1=2('treti') |
|
238 |
link=next |
|
239 |
||
240 |
zbytek pole vypadá takto: |
|
241 |
first := #('prvni' 'treti' 'zeleny' 'ruzovy' ). |
|
242 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy' 'treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa' ). |
|
243 |
||
244 |
link není nil jdeme na odkaz: |
|
245 |
||
246 |
inserted=12('treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa') |
|
247 |
deleted=0 |
|
248 |
line0=7('ruzovy') |
|
249 |
line1=4('ruzovy') |
|
250 |
link=nil |
|
251 |
||
252 |
zbytek pole vypadá takto: |
|
253 |
first := #('prvni' 'treti' 'zeleny' 'ruzovy' ). |
|
254 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy'). |
|
255 |
||
256 |
Konec |
|
257 |
" |
|
258 |
! ! |
|
259 |
||
260 |
!Diff class methodsFor:'instance creation'! |
|
261 |
||
262 |
new |
|
263 |
"return an initialized instance" |
|
264 |
||
265 |
^ self basicNew initialize. |
|
266 |
! ! |
|
267 |
||
268 |
!Diff class methodsFor:'diffing'! |
|
269 |
||
270 |
between: a and: b |
|
271 |
||
272 |
^self between: a and: b reverse: false |
|
273 |
||
274 |
"Created: / 16-02-2010 / 23:08:55 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
275 |
! |
|
276 |
||
277 |
between: a and: b reverse: reverse |
|
278 |
||
279 |
^self new |
|
280 |
a: a b: b; |
|
281 |
diff: reverse |
|
282 |
||
283 |
"Created: / 16-02-2010 / 23:04:50 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
284 |
! ! |
|
285 |
||
286 |
!Diff methodsFor:'diffing'! |
|
287 |
||
288 |
a:gA b:gB |
|
289 |
"Prepare to find differences between two arrays. Each element of |
|
290 |
the arrays is translated to an" |
|
291 |
"equivalence number" |
|
292 |
" based on |
|
293 |
the result of <code>equals</code>. The original Object arrays |
|
294 |
are no longer needed for computing the differences. They will |
|
295 |
be needed again later to print the results of the comparison as |
|
296 |
an edit script, if desired." |
|
297 |
||
298 |
|h data| |
|
299 |
||
300 |
h := Dictionary new:(gA size + gB size). |
|
301 |
data := Data new. |
|
302 |
data fileData. |
|
303 |
data |
|
304 |
fileData:gA |
|
305 |
hashTable:h |
|
306 |
felDiff:self. |
|
307 |
self filevec at:1 put:data. |
|
308 |
data := Data new. |
|
309 |
data fileData. |
|
310 |
data |
|
311 |
fileData:gB |
|
312 |
hashTable:h |
|
313 |
felDiff:self. |
|
314 |
self filevec at:2 put:data. |
|
315 |
||
316 |
"Modified: / 12-02-2010 / 14:22:56 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
317 |
! |
|
318 |
||
319 |
diff |
|
320 |
||
321 |
^self diff: false |
|
322 |
||
323 |
"Created: / 16-02-2010 / 22:50:26 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
324 |
! |
|
325 |
||
326 |
diff:reverse |
|
327 |
||
328 |
^reverse |
|
329 |
ifTrue:[self diffUsingScript: ReverseScript new] |
|
330 |
ifFalse:[self diffUsingScript: ForwardScript new] |
|
331 |
||
332 |
"Modified: / 16-02-2010 / 22:51:43 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
333 |
! |
|
334 |
||
335 |
diffUsingScript:bld |
|
336 |
"Get the results of comparison as an edit script. The script |
|
337 |
is described by a list of changes. The standard ScriptBuilder |
|
338 |
implementations provide for forward and reverse edit scripts. |
|
339 |
Alternate implementations could, for instance, list common elements |
|
340 |
instead of differences. |
|
341 |
@param bld an object to build the script from change flags |
|
342 |
@return the head of a list of changes |
|
343 |
Some lines are obviously insertions or deletions |
|
344 |
because they don't match anything. Detect them now, |
|
345 |
and avoid even thinking about them in the main comparison algorithm." |
|
346 |
||
347 |
|diags first second ret| |
|
348 |
||
349 |
self discardConfusingLines. |
|
350 |
"Now do the main comparison algorithm, considering just the |
|
351 |
undiscarded lines." |
|
352 |
first := filevec at:1. |
|
353 |
second := filevec at:2. |
|
354 |
xvec := first undiscarded. |
|
355 |
yvec := second undiscarded. |
|
356 |
diags := (first nondiscardedLines) + (second nondiscardedLines) + 3. |
|
357 |
fdiag := Array new:diags withAll:0. |
|
358 |
fdiagoff := second nondiscardedLines + 1. |
|
359 |
bdiag := Array new:diags withAll:0. |
|
360 |
bdiagoff := second nondiscardedLines + 1. |
|
361 |
self |
|
362 |
compareseq:0 |
|
363 |
xlim:first nondiscardedLines |
|
364 |
yoff:0 |
|
365 |
ylim:second nondiscardedLines. |
|
366 |
fdiag := nil. |
|
367 |
bdiag := nil. |
|
368 |
self shiftBoundaries. |
|
369 |
ret := bld |
|
370 |
buildScript:first changedFlag |
|
371 |
length0:first bufferedLines |
|
372 |
changed1:second changedFlag |
|
373 |
length1:second bufferedLines. |
|
374 |
^ ret. |
|
375 |
||
376 |
"Modified: / 12-02-2010 / 13:57:09 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
377 |
! ! |
|
378 |
||
379 |
!Diff methodsFor:'initialization'! |
|
380 |
||
381 |
initialize |
|
382 |
"konstruktor" |
|
383 |
||
384 |
equivMax := 1. |
|
385 |
heuristic := false. |
|
386 |
nodiscards := false. |
|
387 |
xvec := Array new. |
|
388 |
yvec := Array new. |
|
389 |
fdiag := Array new. |
|
390 |
bdiag := Array new. |
|
391 |
filevec := Array new:2. |
|
392 |
snakeLimit := 20. |
|
393 |
inhibit := false. |
|
394 |
||
395 |
"Modified: / 16-02-2010 / 22:51:04 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
396 |
! ! |
|
397 |
||
398 |
!Diff methodsFor:'private'! |
|
399 |
||
400 |
compareseq:gXoff xlim:gXlim yoff:gYoff ylim:gYlim |
|
401 |
"Compare in detail contiguous subsequences of the two files |
|
402 |
which are known, as a whole, to match each other. |
|
403 |
||
404 |
The results are recorded in the vectors filevec[N].changedflag, by |
|
405 |
storing a 1 in the element for each line that is an insertion or deletion. |
|
406 |
||
407 |
The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. |
|
408 |
||
409 |
Note that XLIM, YLIM are exclusive bounds. |
|
410 |
All line numbers are origin-0 and discarded lines are not counted." |
|
411 |
||
412 |
|xoff xlim yoff ylim c d f b| |
|
413 |
||
414 |
xoff := gXoff. |
|
415 |
xlim := gXlim. |
|
416 |
yoff := gYoff. |
|
417 |
ylim := gYlim. |
|
418 |
||
419 |
"Slide down the bottom initial diagonal." |
|
420 |
[(xoff < xlim) and: [(yoff < ylim) and: [(xvec at: xoff + 1) = (yvec at: yoff + 1)]]] whileTrue: |
|
421 |
[ |
|
422 |
xoff := xoff + 1. |
|
423 |
yoff := yoff + 1. |
|
424 |
]. |
|
425 |
||
426 |
"Slide up the top initial diagonal." |
|
427 |
[(xlim > xoff) and: [(ylim > yoff) and: [(xvec at: xlim) = (yvec at: ylim)]]] whileTrue: |
|
428 |
[ |
|
429 |
xlim := xlim - 1. |
|
430 |
ylim := ylim - 1. |
|
431 |
]. |
|
432 |
||
433 |
"Handle simple cases." |
|
434 |
||
435 |
(xoff = xlim) ifTrue: |
|
436 |
[ |
|
437 |
[yoff < ylim] whileTrue: |
|
438 |
[ |
|
439 |
((filevec at: 2) changedFlag) at: (2 + ((filevec at: 2) realindexes at: yoff+1)) put: true. |
|
440 |
yoff := yoff + 1. |
|
441 |
] |
|
442 |
] |
|
443 |
ifFalse: |
|
444 |
[ |
|
445 |
(yoff = ylim) ifTrue: |
|
446 |
[ |
|
447 |
[xoff < xlim] whileTrue: |
|
448 |
[ |
|
449 |
((filevec at: 1) changedFlag) at: (2 + ((filevec at: 1) realindexes at: xoff+1)) put: true. |
|
450 |
xoff := xoff + 1. |
|
451 |
] |
|
452 |
] |
|
453 |
ifFalse: |
|
454 |
[ |
|
455 |
"Find a point of correspondence in the middle of the files." |
|
456 |
d := self diag: xoff xlim: xlim yoff: yoff ylim: ylim. |
|
457 |
c := cost. |
|
458 |
f := fdiag at: (fdiagoff + d+1). |
|
459 |
b := bdiag at: (bdiagoff + d+1). |
|
460 |
||
461 |
(c = 1) ifTrue: |
|
462 |
[ |
|
463 |
"This should be impossible, because it implies that |
|
464 |
one of the two subsequences is empty, |
|
465 |
and that case was handled above without calling `diag'. |
|
466 |
Let's verify that this is true." |
|
467 |
d := Exception new. |
|
468 |
d signal. |
|
469 |
] |
|
470 |
ifFalse: |
|
471 |
[ |
|
472 |
"Use that point to split this problem into two subproblems." |
|
473 |
self compareseq: xoff xlim: b yoff: yoff ylim: (b - d). |
|
474 |
"This used to use f instead of b, |
|
475 |
but that is incorrect!! |
|
476 |
It is not necessarily the case that diagonal d |
|
477 |
has a snake from b to f." |
|
478 |
self compareseq: b xlim: xlim yoff: (b - d) ylim: ylim. |
|
479 |
] |
|
480 |
] |
|
481 |
] |
|
482 |
! |
|
483 |
||
484 |
diag: anXoff xlim: anXlim yoff: aYoff ylim: aYlim |
|
485 |
|fd bd xv yv dmin dmax fmid fmax bmid bmax fmin bmin odd c cont d bigsnake tlo thi x oldx y best bestpos dd v temp k cont2| |
|
486 |
fd := fdiag. "Give the compiler a chance." |
|
487 |
bd := bdiag. "Additional help for the compiler." |
|
488 |
xv := xvec. "Still more help for the compiler." |
|
489 |
yv := yvec. "And more and more . . ." |
|
490 |
dmin := anXoff-aYlim. "Minimum valid diagonal." |
|
491 |
dmax := anXlim-aYoff. "Maximum valid diagonal." |
|
492 |
fmid := anXoff-aYoff. "Center diagonal of top-down search." |
|
493 |
bmid := anXlim-aYlim. "Center diagonal of bottom-up search." |
|
494 |
fmin := fmid. "Limits of top-down search." |
|
495 |
fmax := fmid. " --||-- " |
|
496 |
bmin := bmid. "Limits of bottom-up search." |
|
497 |
bmax := bmid. " --||-- " |
|
498 |
||
499 |
odd := (fmid-bmid) odd. "True if southeast corner is on an odd diagonal with respect to the northwest." |
|
500 |
||
501 |
"Added + 1 to all arrays since StX uses index 1 as first" |
|
502 |
fd at:(fdiagoff+fmid + 1) put: anXoff. |
|
503 |
bd at:(bdiagoff+bmid + 1) put: anXlim. |
|
504 |
||
505 |
c := 1. |
|
506 |
cont := true. |
|
507 |
[cont = true] whileTrue:[ |
|
508 |
d := nil. "Active diagonal." |
|
509 |
bigsnake := false. |
|
510 |
||
511 |
"Extend the top-down search by an edit step in each diagonal." |
|
512 |
(fmin > dmin) ifTrue:[ |
|
513 |
fmin := fmin-1. |
|
514 |
fd at:(fdiagoff + fmin - 1 + 1) put: -1. |
|
515 |
] ifFalse:[ fmin := fmin + 1. ]. |
|
516 |
(fmax < dmax) ifTrue:[ |
|
517 |
fmax := fmax+1. |
|
518 |
fd at:(fdiagoff + fmax + 1 + 1) put: -1. |
|
519 |
] ifFalse:[ fmax := fmax - 1. ]. |
|
520 |
||
521 |
d := fmax. |
|
522 |
[(d >= fmin)] whileTrue:[ |
|
523 |
tlo := fd at:(fdiagoff + d - 1 + 1). |
|
524 |
thi := fd at:(fdiagoff + d + 1 + 1). |
|
525 |
(tlo >= thi) ifTrue:[ |
|
526 |
x := tlo + 1. |
|
527 |
] ifFalse:[ x := thi. ]. |
|
528 |
oldx := x. |
|
529 |
y := x - d. |
|
530 |
[(x < anXlim) and: [(y < aYlim) and: [((xv at: (x+1)) = (yv at: (y+1)))]]] whileTrue:[ |
|
531 |
x := x+1. |
|
532 |
y := y+1. |
|
533 |
]. |
|
534 |
((x-oldx) > snakeLimit) ifTrue:[ |
|
535 |
bigsnake := true. |
|
536 |
]. |
|
537 |
fd at: (fdiagoff + d + 1) put: x. |
|
538 |
(odd and: [bmin <= d and: [d <= bmax and:[(bd at:(bdiagoff + d + 1)) <= (fd at:(fdiagoff + d + 1))]]]) ifTrue:[ |
|
539 |
cost := (2 * c) - 1. |
|
540 |
^d. |
|
541 |
] ifFalse:[ d := d - 2.]. |
|
542 |
]. |
|
543 |
||
544 |
"Similar extend the bottom-up search." |
|
545 |
(bmin > dmin) ifTrue:[ |
|
546 |
bmin := bmin - 1. |
|
547 |
bd at:(bdiagoff + bmin - 1 + 1) put: 2147483647. |
|
548 |
] ifFalse:[ bmin := bmin + 1.]. |
|
549 |
(bmax < dmax) ifTrue:[ |
|
550 |
bmax := bmax + 1. |
|
551 |
bd at:(bdiagoff + bmax + 1 + 1) put: 2147483647. |
|
552 |
] ifFalse:[ bmax := bmax - 1.]. |
|
553 |
||
554 |
d := bmax. |
|
555 |
[(d >= bmin)] whileTrue:[ |
|
556 |
tlo := bd at:(bdiagoff + d - 1 + 1). |
|
557 |
thi := bd at:(bdiagoff + d + 1 + 1). |
|
558 |
(tlo < thi) ifTrue:[ |
|
559 |
x := tlo. |
|
560 |
] ifFalse:[ x := thi - 1. ]. |
|
561 |
oldx := x. |
|
562 |
y := x - d. |
|
563 |
[(x > anXoff) and: [(y > aYoff) and: [((xv at: (x-1+1)) = (yv at: (y-1+1)))]]] whileTrue:[ |
|
564 |
x := x-1. |
|
565 |
y := y-1. |
|
566 |
]. |
|
567 |
((x-oldx) > snakeLimit) ifTrue:[ |
|
568 |
bigsnake := true. |
|
569 |
]. |
|
570 |
bd at: (bdiagoff + d + 1) put: x. |
|
571 |
((odd = false) and: [fmin <= d and: [d <= fmax and:[(bd at:(bdiagoff + d + 1)) <= (fd at:(fdiagoff + d + 1))]]]) ifTrue:[ |
|
572 |
cost := (2 * c). |
|
573 |
^d. |
|
574 |
] ifFalse:[ d := d - 2.]. |
|
575 |
]. |
|
576 |
||
577 |
"Heuristic: check occasionally for a diagonal that has made |
|
578 |
lots of progress compared with the edit distance. |
|
579 |
If we have any such, find the one that has made the most |
|
580 |
progress and return it as if it had succeeded. |
|
581 |
||
582 |
With this heuristic, for files with a constant small density |
|
583 |
of changes, the algorithm is linear in the file size." |
|
584 |
((c>200) and:[bigsnake and:[heuristic]]) ifTrue:[ |
|
585 |
best := 0. |
|
586 |
bestpos := -1. |
|
587 |
d := fmax. |
|
588 |
[(d >= fmin)] whileTrue:[ |
|
589 |
dd := d - fmid. |
|
590 |
x := fd at: (fdiagoff + d + 1). |
|
591 |
y := x - d. |
|
592 |
v := ((x - anXoff) * 2) - dd. |
|
593 |
temp := ((dd abs) + c) * 12. |
|
594 |
(v > temp) ifTrue:[ |
|
595 |
((v > best) and:[(anXoff + snakeLimit <= x) and:[(x < anXlim) and:[(aYoff + snakeLimit <= y) and:[(y < aYlim)]]]]) ifTrue:[ |
|
596 |
"We have a good enough best diagonal; |
|
597 |
now insist that it end with a significant snake." |
|
598 |
k := 1. |
|
599 |
cont2 := true. |
|
600 |
[(xvec at:(x-k + 1)) = (yvec at:(y-k + 1)) and:[cont2]] whileTrue:[ |
|
601 |
(k = snakeLimit) ifTrue:[ |
|
602 |
best := v. |
|
603 |
bestpos := d. |
|
604 |
cont2 := false. |
|
605 |
] ifFalse:[ k := k + 1.]. |
|
606 |
]. |
|
607 |
]. |
|
608 |
]. |
|
609 |
d := d - 2. |
|
610 |
]. |
|
611 |
(best > 0) ifTrue:[ |
|
612 |
cost := (2 * c) - 1. |
|
613 |
^bestpos. |
|
614 |
]. |
|
615 |
||
616 |
best := 0. |
|
617 |
d := bmax. |
|
618 |
[(d >= bmin)] whileTrue:[ |
|
619 |
dd := d - bmid. |
|
620 |
x := bd at: (bdiagoff + d + 1). |
|
621 |
y := x - d. |
|
622 |
v := ((anXlim - x) * 2) + dd. |
|
623 |
temp := ((dd abs) + c) * 12. |
|
624 |
(v > temp) ifTrue:[ |
|
625 |
((v > best) and:[(anXoff < x) and:[(x <= (anXlim - snakeLimit)) and:[(aYoff < y) and:[(y <= (aYlim - snakeLimit))]]]]) ifTrue:[ |
|
626 |
"We have a good enough best diagonal; |
|
627 |
now insist that it end with a significant snake." |
|
628 |
k := 0. |
|
629 |
cont2 := true. |
|
630 |
[((xvec at:(x+k + 1)) = (yvec at:(y+k + 1))) and:[cont2]] whileTrue:[ |
|
631 |
(k = snakeLimit) ifTrue:[ |
|
632 |
best := v. |
|
633 |
bestpos := d. |
|
634 |
cont2 := false. |
|
635 |
] ifFalse:[ k := k + 1.]. |
|
636 |
]. |
|
637 |
]. |
|
638 |
]. |
|
639 |
d := d - 2. |
|
640 |
]. |
|
641 |
(best > 0) ifTrue:[ |
|
642 |
cost := (2 * c) - 1. |
|
643 |
^bestpos. |
|
644 |
]. |
|
645 |
]. |
|
646 |
c := c + 1. |
|
647 |
] |
|
648 |
! |
|
649 |
||
650 |
discardConfusingLines |
|
651 |
"Discard lines from one file that have no matches in the other file." |
|
652 |
||
653 |
|first second| |
|
654 |
||
655 |
first := filevec at:1. |
|
656 |
second := filevec at:2. |
|
657 |
first discardConfusingLines:second felDiff:self. |
|
658 |
second discardConfusingLines: first felDiff:self. |
|
659 |
! |
|
660 |
||
661 |
equivMax |
|
662 |
^ equivMax |
|
663 |
! |
|
664 |
||
665 |
equivMax:something |
|
666 |
equivMax := something. |
|
667 |
! |
|
668 |
||
669 |
filevec |
|
670 |
^ filevec |
|
671 |
! |
|
672 |
||
673 |
nodiscards |
|
674 |
^ nodiscards |
|
675 |
! |
|
676 |
||
677 |
nodiscards:something |
|
678 |
nodiscards := something. |
|
679 |
! |
|
680 |
||
681 |
shiftBoundaries |
|
682 |
"Adjust inserts/deletes of blank lines to join changes |
|
683 |
as much as possible." |
|
684 |
||
685 |
|first second| |
|
686 |
||
687 |
(inhibit) ifTrue:[ |
|
688 |
^ nil. |
|
689 |
]. |
|
690 |
first := filevec at:1. |
|
691 |
second := filevec at:2. |
|
692 |
first shiftBoundaries:second. |
|
693 |
second shiftBoundaries:first. |
|
694 |
! ! |
|
695 |
||
696 |
!Diff::Change class methodsFor:'documentation'! |
|
697 |
||
698 |
documentation |
|
699 |
" |
|
700 |
The result of comparison is an ""edit script"": a chain of change objects. |
|
701 |
Each change represents one place where some lines are deleted |
|
702 |
and some are inserted. |
|
703 |
||
704 |
LINE0 and LINE1 are the first affected lines in the two files (origin 0). |
|
705 |
DELETED is the number of lines deleted here from file 0. |
|
706 |
INSERTED is the number of lines inserted here in file 1. |
|
707 |
||
708 |
If DELETED is 0 then LINE0 is the number of the line before |
|
709 |
which the insertion was done; vice versa for INSERTED and LINE1. |
|
710 |
" |
|
711 |
! ! |
|
712 |
||
713 |
!Diff::Change methodsFor:'accessing'! |
|
714 |
||
715 |
deleted |
|
716 |
"Line number of 1st deleted line." |
|
717 |
^ deleted |
|
718 |
! |
|
719 |
||
720 |
inserted |
|
721 |
"# lines of file 0 changed here." |
|
722 |
^ inserted |
|
723 |
! |
|
724 |
||
725 |
line0 |
|
726 |
"Line number of 1st deleted line." |
|
727 |
^ line0 |
|
728 |
! |
|
729 |
||
730 |
line1 |
|
731 |
"Line number of 1st inserted line." |
|
732 |
^ line1 |
|
733 |
! ! |
|
734 |
||
735 |
!Diff::Change methodsFor:'enumerating'! |
|
736 |
||
737 |
do: aBlock |
|
738 |
||
739 |
| chg | |
|
740 |
chg := self. |
|
741 |
[ chg notNil ] whileTrue: |
|
742 |
[aBlock value: chg. |
|
743 |
chg := chg nextLink]. |
|
744 |
||
745 |
"Created: / 16-02-2010 / 22:53:40 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
746 |
! ! |
|
747 |
||
748 |
!Diff::Change methodsFor:'instance creation'! |
|
749 |
||
750 |
newLine0:aLine0 line1:aLine1 deleted:aDeleted inserted:aInserted next: nextChange |
|
751 |
"Cons an additional entry onto the front of an edit script OLD. |
|
752 |
LINE0 and LINE1 are the first affected lines in the two files (origin 0). |
|
753 |
DELETED is the number of lines deleted here from file 0. |
|
754 |
INSERTED is the number of lines inserted here in file 1. |
|
755 |
||
756 |
If DELETED is 0 then LINE0 is the number of the line before |
|
757 |
which the insertion was done; vice versa for INSERTED and LINE1." |
|
758 |
||
759 |
line0 := aLine0. |
|
760 |
line1 := aLine1. |
|
761 |
deleted := aDeleted. |
|
762 |
inserted := aInserted. |
|
763 |
nextLink := nextChange. |
|
764 |
||
765 |
"Modified: / 12-02-2010 / 13:42:30 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
766 |
! ! |
|
767 |
||
768 |
!Diff::Data methodsFor:'accessing'! |
|
769 |
||
770 |
bufferedLines |
|
771 |
^ bufferedLines |
|
772 |
! |
|
773 |
||
774 |
changedFlag |
|
775 |
^ changedFlag |
|
776 |
! |
|
777 |
||
778 |
nondiscardedLines |
|
779 |
^ nondiscardedLines |
|
780 |
! |
|
781 |
||
782 |
realindexes |
|
783 |
^ realindexes |
|
784 |
! |
|
785 |
||
786 |
undiscarded |
|
787 |
^ undiscarded |
|
788 |
! ! |
|
789 |
||
790 |
!Diff::Data methodsFor:'default'! |
|
791 |
||
792 |
clear |
|
793 |
"Allocate changed array for the results of comparison. |
|
794 |
Allocate a flag for each line of each file, saying whether that line |
|
795 |
is an insertion or deletion. allocate an extra element, always zero, |
|
796 |
at each end of each vector." |
|
797 |
||
798 |
changedFlag := Array new:bufferedLines + 2 withAll:false |
|
799 |
||
800 |
"Modified: / 12-02-2010 / 13:55:52 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
801 |
! |
|
802 |
||
803 |
discard:discards felDiff:fellDiffClass |
|
804 |
"Actually discard the lines. |
|
805 |
@param discards flags lines to be discarded" |
|
806 |
|end j i| |
|
807 |
end:=bufferedLines. |
|
808 |
j:=0. |
|
809 |
i:=0. |
|
810 |
[i<end]whileTrue:[ |
|
811 |
(fellDiffClass nodiscards or:[(discards at:i+1)=0])ifTrue:[ |
|
812 |
undiscarded at:j+1 put:(equivs at:i+1). |
|
813 |
realindexes at:j+1 put:i. |
|
814 |
j:=j+1. |
|
815 |
]ifFalse:[ |
|
816 |
changedFlag at:(i+1+1) put:true. |
|
817 |
]. |
|
818 |
nondiscardedLines :=j. |
|
819 |
i:=i+1. |
|
820 |
]. |
|
821 |
! |
|
822 |
||
823 |
discardConfusingLines: f felDiff: felDiff |
|
824 |
" |
|
825 |
Discard lines that have no matches in another file. |
|
826 |
||
827 |
A line which is discarded will not be considered by the actual |
|
828 |
comparison algorithm; it will be as if that line were not in the file. |
|
829 |
The file's `realindexes' table maps virtual line numbers |
|
830 |
(which don't count the discarded lines) into real line numbers; |
|
831 |
this is how the actual comparison algorithm produces results |
|
832 |
that are comprehensible when the discarded lines are counted. |
|
833 |
||
834 |
When we discard a line, we also mark it as a deletion or insertion |
|
835 |
so that it will be printed in the output. |
|
836 |
@param f the other file |
|
837 |
" |
|
838 |
| discarded | |
|
839 |
self clear. |
|
840 |
||
841 |
"Set up table of which lines are going to be discarded." |
|
842 |
discarded := self discardable: (f equivCount: felDiff). |
|
843 |
||
844 |
"Don't really discard the provisional lines except when they occur |
|
845 |
in a run of discardables, with nonprovisionals at the beginning |
|
846 |
and end." |
|
847 |
self filterDiscards: discarded. |
|
848 |
||
849 |
"Actually discard the lines." |
|
850 |
self discard: discarded felDiff: felDiff. |
|
851 |
! |
|
852 |
||
853 |
discardable: counts |
|
854 |
" Mark to be discarded each line that matches no line of another file. |
|
855 |
If a line matches many lines, mark it as provisionally discardable. |
|
856 |
@see equivCount() |
|
857 |
@param counts The count of each equivalence number for the other file. |
|
858 |
@return 0=nondiscardable, 1=discardable or 2=provisionally discardable |
|
859 |
for each line" |
|
860 |
| nmatch i end discards equivs2 many tem | |
|
861 |
end := bufferedLines. |
|
862 |
discards := Array new: end. |
|
863 |
equivs2 := equivs. |
|
864 |
many := 5. |
|
865 |
tem := (end / 64). |
|
866 |
tem :=tem asInteger. |
|
867 |
tem := tem >> 2. |
|
868 |
i:=1. |
|
869 |
[i<=end]whileTrue:[discards at:i put:0. |
|
870 |
i:=i+1.]. |
|
871 |
"Multiply MANY by approximate square root of number of lines. |
|
872 |
That is the threshold for provisionally discardable lines. " |
|
873 |
[tem > 0] |
|
874 |
whileTrue: [many := many * 2. |
|
875 |
tem := tem >> 2 |
|
876 |
]. |
|
877 |
i := 1. |
|
878 |
[i <= end] |
|
879 |
whileTrue: [(equivs2 at: i) |
|
880 |
= 0 |
|
881 |
ifFalse: [nmatch := counts |
|
882 |
at: (equivs2 at: i)+1. |
|
883 |
nmatch = 0 |
|
884 |
ifTrue: [discards at: i put: 1] |
|
885 |
ifFalse: [nmatch > many |
|
886 |
ifTrue: [discards at: i put: 2]]]. |
|
887 |
i := i + 1]. |
|
888 |
||
889 |
^ discards |
|
890 |
! |
|
891 |
||
892 |
equivCount: felDiff |
|
893 |
| pom i equivCount size| |
|
894 |
equivCount := Array new: (felDiff equivMax) withAll: 0. |
|
895 |
i:=1. |
|
896 |
size:=equivCount size. |
|
897 |
[i<=size]whileTrue:[ |
|
898 |
equivCount at:i put:0. |
|
899 |
i:=i+1. |
|
900 |
]. |
|
901 |
||
902 |
i := 0. |
|
903 |
[i < bufferedLines] |
|
904 |
whileTrue: [ |
|
905 |
pom:=equivs at: i+1. |
|
906 |
pom := equivCount at: pom+1. |
|
907 |
pom := pom + 1. |
|
908 |
equivCount at: (equivs at: i+1)+1 put: pom. |
|
909 |
i := i + 1.]. |
|
910 |
^ equivCount |
|
911 |
||
912 |
"Modified: / 12-02-2010 / 13:56:10 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
913 |
! |
|
914 |
||
915 |
fileData |
|
916 |
"konstruktor" |
|
917 |
equivs := Array new. |
|
918 |
undiscarded := Array new. |
|
919 |
realindexes := Array new. |
|
920 |
nondiscardedLines := 0. |
|
921 |
changedFlag := Array new. |
|
922 |
! |
|
923 |
||
924 |
fileData: data hashTable: h felDiff:fellDiffClass |
|
925 |
| i size ir| |
|
926 |
bufferedLines := data size. |
|
927 |
||
928 |
equivs := Array new: bufferedLines withAll: 0. |
|
929 |
||
930 |
undiscarded := Array new: bufferedLines withAll: 0. |
|
931 |
||
932 |
realindexes := Array new: bufferedLines withAll: 0. |
|
933 |
||
934 |
size := data size. |
|
935 |
i := 1. |
|
936 |
[i<=size]whileTrue: [ir := h at: (data at: i) ifAbsent: nil. |
|
937 |
ir isNil |
|
938 |
ifTrue: [ |
|
939 |
equivs at: i put:fellDiffClass equivMax. |
|
940 |
fellDiffClass equivMax:( fellDiffClass equivMax + 1). |
|
941 |
h at: (data at: i) put: (equivs at: i)] |
|
942 |
ifFalse: [equivs at: i put: ir]. |
|
943 |
i:=i+1]. |
|
944 |
||
945 |
"Modified: / 12-02-2010 / 13:56:42 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
946 |
! |
|
947 |
||
948 |
filterDiscards:discards |
|
949 |
"Don't really discard the provisional lines except when they occur |
|
950 |
in a run of discardables, with nonprovisionals at the beginning |
|
951 |
and end." |
|
952 |
||
953 |
|end i j length provisional bool consec minimum tem| |
|
954 |
||
955 |
end := bufferedLines. |
|
956 |
i := 0. |
|
957 |
[ i < end ] whileTrue:[ |
|
958 |
"Cancel provisional discards not in middle of run of discards." |
|
959 |
((discards at:i + 1) isNil) ifTrue:[ |
|
960 |
discards at:i + 1 put:0 |
|
961 |
]. |
|
962 |
(discards at:i + 1) = 2 ifTrue:[ |
|
963 |
discards at:i + 1 put:0 |
|
964 |
] ifFalse:[ |
|
965 |
(discards at:i + 1) = 0 ifFalse:[ |
|
966 |
"We have found a nonprovisional discard." |
|
967 |
provisional := 0. |
|
968 |
j := i. |
|
969 |
bool := true. |
|
970 |
"Find end of this run of discardable lines. |
|
971 |
Count how many are provisionally discardable." |
|
972 |
[ bool and:[ j < end ] ] whileTrue:[ |
|
973 |
(discards at:j + 1) = 2 ifTrue:[ |
|
974 |
provisional := provisional + 1 |
|
975 |
]. |
|
976 |
(discards at:j + 1) = 0 ifTrue:[ |
|
977 |
bool := false |
|
978 |
] ifFalse:[ j := j + 1 ] |
|
979 |
]. |
|
980 |
"Cancel provisional discards at end, and shrink the run." |
|
981 |
[ |
|
982 |
j > i and:[ (discards at:j - 1 + 1) = 2 ] |
|
983 |
] whileTrue:[ |
|
984 |
j := j - 1. |
|
985 |
discards at:j + 1 put:0. |
|
986 |
provisional := provisional - 1 |
|
987 |
]. |
|
988 |
"Now we have the length of a run of discardable lines |
|
989 |
whose first and last are not provisional." |
|
990 |
length := j - i. |
|
991 |
(provisional * 4 > length) ifTrue:[ |
|
992 |
[ j > i ] whileTrue:[ |
|
993 |
j := j - 1. |
|
994 |
(discards at:j + 1) = 2 ifTrue:[ |
|
995 |
discards at:j + 1 put:0 |
|
996 |
] |
|
997 |
] |
|
998 |
] ifFalse:[ |
|
999 |
"MINIMUM is approximate square root of LENGTH/4. |
|
1000 |
A subrun of two or more provisionals can stand |
|
1001 |
when LENGTH is at least 16. |
|
1002 |
A subrun of 4 or more can stand when LENGTH >= 64." |
|
1003 |
minimum := 1. |
|
1004 |
tem := (length / 4) asInteger. |
|
1005 |
tem := tem >> 2. |
|
1006 |
[ tem > 0 ] whileTrue:[ |
|
1007 |
minimum := minimum * 2. |
|
1008 |
tem := tem >> 2 |
|
1009 |
]. |
|
1010 |
minimum := minimum + 1. |
|
1011 |
"Cancel any subrun of MINIMUM or more provisionals |
|
1012 |
within the larger run." |
|
1013 |
j := 0. |
|
1014 |
consec := 0. |
|
1015 |
[ j < length ] whileTrue:[ |
|
1016 |
(discards at:i + j + 1) ~= 2 ifTrue:[ |
|
1017 |
consec := 0 |
|
1018 |
] ifFalse:[ |
|
1019 |
consec := consec + 1. |
|
1020 |
minimum = consec ifTrue:[ |
|
1021 |
"Back up to start of subrun, to cancel it all." |
|
1022 |
j := j - consec |
|
1023 |
] ifFalse:[ |
|
1024 |
discards at:i + j + 1 put:0 |
|
1025 |
] |
|
1026 |
]. |
|
1027 |
j := j + 1 |
|
1028 |
]. |
|
1029 |
"Scan from beginning of run |
|
1030 |
until we find 3 or more nonprovisionals in a row |
|
1031 |
or until the first nonprovisional at least 8 lines in. |
|
1032 |
Until that point, cancel any provisionals." |
|
1033 |
j := 0. |
|
1034 |
consec := 0. |
|
1035 |
bool := true. |
|
1036 |
[ |
|
1037 |
bool and:[ j < length ] |
|
1038 |
] whileTrue:[ |
|
1039 |
(j >= 8 and:[ (discards at:i + j + 1) = 1 ]) ifTrue:[ |
|
1040 |
bool := false |
|
1041 |
] ifFalse:[ |
|
1042 |
(discards at:i + j + 1) = 2 ifTrue:[ |
|
1043 |
consec := 0. |
|
1044 |
discards at:i + j + 1 put:0 |
|
1045 |
] ifFalse:[ |
|
1046 |
(discards at:i + j + 1) = 0 ifTrue:[ |
|
1047 |
consec := 0 |
|
1048 |
] ifFalse:[ |
|
1049 |
consec := consec + 1 |
|
1050 |
] |
|
1051 |
] |
|
1052 |
]. |
|
1053 |
(consec = 3) ifTrue:[ |
|
1054 |
bool := false |
|
1055 |
]. |
|
1056 |
j := j + 1 |
|
1057 |
]. |
|
1058 |
"I advances to the last line of the run." |
|
1059 |
i := i + length - 1. |
|
1060 |
bool := true. |
|
1061 |
"Same thing, from end. " |
|
1062 |
j := 0. |
|
1063 |
consec := 0. |
|
1064 |
[ |
|
1065 |
bool and:[ j < length ] |
|
1066 |
] whileTrue:[ |
|
1067 |
(j >= 8 and:[ (discards at:i - j + 1) = 1 ]) ifTrue:[ |
|
1068 |
bool := false |
|
1069 |
] ifFalse:[ |
|
1070 |
(discards at:i - j + 1) = 2 ifTrue:[ |
|
1071 |
consec := 0. |
|
1072 |
discards at:i - j + 1 put:0 |
|
1073 |
] ifFalse:[ |
|
1074 |
(discards at:i - j + 1) = 0 ifTrue:[ |
|
1075 |
consec := 0 |
|
1076 |
] ifFalse:[ |
|
1077 |
consec := consec + 1 |
|
1078 |
] |
|
1079 |
] |
|
1080 |
]. |
|
1081 |
(consec = 3) ifTrue:[ |
|
1082 |
bool := false |
|
1083 |
]. |
|
1084 |
j := j + 1 |
|
1085 |
] |
|
1086 |
] |
|
1087 |
] |
|
1088 |
]. |
|
1089 |
i := i + 1. |
|
1090 |
] |
|
1091 |
! |
|
1092 |
||
1093 |
shiftBoundaries:f |
|
1094 |
"Adjust inserts/deletes of blank lines to join changes |
|
1095 |
as much as possible. |
|
1096 |
We do something when a run of changed lines include a blank |
|
1097 |
line at one end and have an excluded blank line at the other. |
|
1098 |
We are free to choose which blank line is included. |
|
1099 |
`compareseq' always chooses the one at the beginning, |
|
1100 |
but usually it is cleaner to consider the following blank line |
|
1101 |
to be the change. The only exception is if the preceding blank line |
|
1102 |
would join this change to other changes. |
|
1103 |
param f the file being compared against" |
|
1104 |
||
1105 |
|changed otherChanged i j iEnd preceding otherPreceding bool start end otherStart bool2| |
|
1106 |
||
1107 |
changed := changedFlag. |
|
1108 |
otherChanged := f changedFlag. |
|
1109 |
i := 0. |
|
1110 |
j := 0. |
|
1111 |
iEnd := bufferedLines. |
|
1112 |
preceding := -1. |
|
1113 |
otherPreceding := -1. |
|
1114 |
bool := true. |
|
1115 |
bool2 := true. |
|
1116 |
[ bool ] whileTrue:[ |
|
1117 |
[ |
|
1118 |
"Scan forwards to find beginning of another run of changes. |
|
1119 |
Also keep track of the corresponding point in the other file. " |
|
1120 |
i < iEnd and:[ ((changed at:(i + 1+1)) = false)] |
|
1121 |
] whileTrue:[ |
|
1122 |
[otherChanged at:( 1 + j +1)] whileTrue:[ |
|
1123 |
"Non-corresponding lines in the other file |
|
1124 |
will count as the preceding batch of changes." |
|
1125 |
j := j + 1. |
|
1126 |
otherPreceding := j. |
|
1127 |
]. |
|
1128 |
j:=j+1. |
|
1129 |
i := i + 1. |
|
1130 |
]. |
|
1131 |
||
1132 |
(i >= iEnd) ifTrue:[ |
|
1133 |
bool := false. |
|
1134 |
] ifFalse:[ |
|
1135 |
start := i. |
|
1136 |
otherStart := j. |
|
1137 |
bool2 := true. |
|
1138 |
"Now find the end of this run of changes." |
|
1139 |
[ bool2 ] whileTrue:[ |
|
1140 |
[i < iEnd and:[ changed at:(i + 1+1) ]] |
|
1141 |
whileTrue:[ i := i + 1. ]. |
|
1142 |
end := i. |
|
1143 |
"If the first changed line matches the following unchanged one, |
|
1144 |
and this run does not follow right after a previous run, |
|
1145 |
and there are no lines deleted from the other file here, |
|
1146 |
then classify the first changed line as unchanged |
|
1147 |
and the following line as changed in its place. */ |
|
1148 |
||
1149 |
/* You might ask, how could this run follow right after another? |
|
1150 |
Only because the previous run was shifted here." |
|
1151 |
(end ~= iEnd and:[((equivs at:start+1) = (equivs at:end+1)) |
|
1152 |
and:[((otherChanged at:(j + 1+1)) = false) |
|
1153 |
and:[false = ((preceding >= 0 and:[start = preceding]) or:[ otherPreceding >= 0 and:[ otherStart = otherPreceding ]]) |
|
1154 |
] |
|
1155 |
] |
|
1156 |
]) |
|
1157 |
ifTrue:[ |
|
1158 |
changed at:(1 + end+1) put:true. |
|
1159 |
end := end + 1. |
|
1160 |
changed at:(1 + start+1) put:false. |
|
1161 |
start := start + 1. |
|
1162 |
" Since one line-that-matches is now before this run |
|
1163 |
instead of after, we must advance in the other file |
|
1164 |
to keep in synch." |
|
1165 |
i := i + 1. |
|
1166 |
j := j + 1. |
|
1167 |
] |
|
1168 |
ifFalse:[ bool2 := false ]. |
|
1169 |
]. |
|
1170 |
preceding := i. |
|
1171 |
otherPreceding := j. |
|
1172 |
]. |
|
1173 |
]. |
|
1174 |
! ! |
|
1175 |
||
1176 |
!Diff::ForwardScript methodsFor:'default'! |
|
1177 |
||
1178 |
buildScript:aChanged0 length0:aLen0 changed1:aChanged1 length1:aLen1 |
|
1179 |
"Scan the tables of which lines are inserted and deleted, |
|
1180 |
producing an edit script in forward order." |
|
1181 |
||
1182 |
|script i0 i1 line0 line1| |
|
1183 |
script := nil. |
|
1184 |
i0 := aLen0. |
|
1185 |
i1 := aLen1. |
|
1186 |
[i0 >= 0 or:[i1 >= 0]] whileTrue: |
|
1187 |
[((aChanged0 at:i0 + 1) or:[aChanged1 at:i1 + 1]) |
|
1188 |
ifTrue: |
|
1189 |
[line0 := i0. |
|
1190 |
line1 := i1. |
|
1191 |
"Find # lines changed here in each file." |
|
1192 |
[aChanged0 at:i0 + 1] whileTrue:[i0 := i0 - 1]. |
|
1193 |
[aChanged1 at:i1 + 1] whileTrue:[i1 := i1 - 1]. |
|
1194 |
"Record this change." |
|
1195 |
script := Diff::Change new |
|
1196 |
newLine0:i0 |
|
1197 |
line1:i1 |
|
1198 |
deleted:line0 - i0 |
|
1199 |
inserted:line1 - i1 |
|
1200 |
next:script.]. |
|
1201 |
"We have reached lines in the two files that match each other." |
|
1202 |
i0 := i0 - 1. |
|
1203 |
i1 := i1 - 1.]. |
|
1204 |
^script. |
|
1205 |
||
1206 |
"Modified: / 16-02-2010 / 22:49:18 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
1207 |
! ! |
|
1208 |
||
1209 |
!Diff::ReverseScript methodsFor:'default'! |
|
1210 |
||
1211 |
buildScript:aChanged0 length0:aLen0 changed1:aChanged1 length1:aLen1 |
|
1212 |
"Scan the tables of which lines are inserted and deleted, |
|
1213 |
producing an edit script in reverse order." |
|
1214 |
||
1215 |
|script i0 i1 line0 line1| |
|
1216 |
script := nil. |
|
1217 |
i0 := 0. |
|
1218 |
i1 := 0. |
|
1219 |
[i0 < aLen0 or:[i1 < aLen1]] whileTrue: |
|
1220 |
[((aChanged0 at:(1 + i0 + 1)) or:[aChanged1 at:(1 + i1 + 1)]) |
|
1221 |
ifTrue: |
|
1222 |
[line0 := i0. |
|
1223 |
line1 := i1. |
|
1224 |
"Find # lines changed here in each file." |
|
1225 |
[aChanged0 at:(1 + i0 + 1)] whileTrue:[i0 := i0 + 1]. |
|
1226 |
[aChanged1 at:(1 + i1 + 1)] whileTrue:[i1 := i1 + 1]. |
|
1227 |
"Record this change." |
|
1228 |
script := Diff::Change new |
|
1229 |
newLine0:line0 |
|
1230 |
line1:line1 |
|
1231 |
deleted:(i0 - line0) |
|
1232 |
inserted:(i1 - line1) |
|
1233 |
next:script.]. |
|
1234 |
"We have reached lines in the two files that match each other." |
|
1235 |
i0 := i0 + 1. |
|
1236 |
i1 := i1 + 1.]. |
|
1237 |
^script. |
|
1238 |
||
1239 |
"Modified: / 12-02-2010 / 14:15:27 / Jan Vrany <jan.vrany@fit.cvut.cz>" |
|
1240 |
! ! |
|
1241 |
||
1242 |
!Diff class methodsFor:'documentation'! |
|
1243 |
||
1244 |
version_CVS |
|
12287 | 1245 |
^ '§Header: /cvs/stx/stx/libtool/Diff.st,v 1.3 2012/07/27 20:14:58 cg Exp §' |
10014 | 1246 |
! |
1247 |
||
1248 |
version_SVN |
|
12287 | 1249 |
^ '$Id: Diff.st 8048 2012-09-07 17:28:09Z vranyj1 $' |
12123
4bde08cebd48
trunk branched into /branches/jv
Jan Vrany <jan.vrany@fit.cvut.cz>
parents:
10089
diff
changeset
|
1250 |
! ! |