10014
|
1 |
"
|
10089
|
2 |
Copyright (c) 2007-2010 Jan Vrany, SWING Research Group, Czech Technical University in Prague
|
|
3 |
Copyright (c) 2009-2010 eXept Software AG
|
10014
|
4 |
|
10089
|
5 |
Permission is hereby granted, free of charge, to any person
|
|
6 |
obtaining a copy of this software and associated documentation
|
|
7 |
files (the 'Software'), to deal in the Software without
|
|
8 |
restriction, including without limitation the rights to use,
|
|
9 |
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10 |
copies of the Software, and to permit persons to whom the
|
|
11 |
Software is furnished to do so, subject to the following
|
|
12 |
conditions:
|
|
13 |
|
|
14 |
The above copyright notice and this permission notice shall be
|
|
15 |
included in all copies or substantial portions of the Software.
|
|
16 |
|
|
17 |
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
18 |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
19 |
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20 |
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
21 |
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
22 |
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
23 |
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
24 |
OTHER DEALINGS IN THE SOFTWARE.
|
10014
|
25 |
"
|
|
26 |
"{ Package: 'stx:libtool' }"
|
|
27 |
|
16884
|
28 |
"{ NameSpace: Smalltalk }"
|
|
29 |
|
10014
|
30 |
Object subclass:#Diff
|
|
31 |
instanceVariableNames:'equivMax heuristic nodiscards xvec yvec fdiag bdiag fdiagoff
|
|
32 |
bdiagoff filevec cost snakeLimit inhibit'
|
|
33 |
classVariableNames:''
|
|
34 |
poolDictionaries:''
|
11701
|
35 |
category:'Collections-Support'
|
10014
|
36 |
!
|
|
37 |
|
|
38 |
Link subclass:#Change
|
|
39 |
instanceVariableNames:'inserted deleted line0 line1'
|
|
40 |
classVariableNames:''
|
|
41 |
poolDictionaries:''
|
|
42 |
privateIn:Diff
|
|
43 |
!
|
|
44 |
|
|
45 |
Object subclass:#Data
|
|
46 |
instanceVariableNames:'bufferedLines equivs undiscarded realindexes nondiscardedLines
|
|
47 |
changedFlag'
|
|
48 |
classVariableNames:''
|
|
49 |
poolDictionaries:''
|
|
50 |
privateIn:Diff
|
|
51 |
!
|
|
52 |
|
|
53 |
Object subclass:#ForwardScript
|
|
54 |
instanceVariableNames:''
|
|
55 |
classVariableNames:''
|
|
56 |
poolDictionaries:''
|
|
57 |
privateIn:Diff
|
|
58 |
!
|
|
59 |
|
|
60 |
Object subclass:#ReverseScript
|
|
61 |
instanceVariableNames:''
|
|
62 |
classVariableNames:''
|
|
63 |
poolDictionaries:''
|
|
64 |
privateIn:Diff
|
|
65 |
!
|
|
66 |
|
|
67 |
!Diff class methodsFor:'documentation'!
|
|
68 |
|
|
69 |
copyright
|
|
70 |
"
|
10089
|
71 |
Copyright (c) 2007-2010 Jan Vrany, SWING Research Group, Czech Technical University in Prague
|
|
72 |
Copyright (c) 2009-2010 eXept Software AG
|
|
73 |
|
|
74 |
Permission is hereby granted, free of charge, to any person
|
|
75 |
obtaining a copy of this software and associated documentation
|
|
76 |
files (the 'Software'), to deal in the Software without
|
|
77 |
restriction, including without limitation the rights to use,
|
|
78 |
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
79 |
copies of the Software, and to permit persons to whom the
|
|
80 |
Software is furnished to do so, subject to the following
|
|
81 |
conditions:
|
10014
|
82 |
|
10089
|
83 |
The above copyright notice and this permission notice shall be
|
|
84 |
included in all copies or substantial portions of the Software.
|
|
85 |
|
|
86 |
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
87 |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
88 |
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
89 |
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
90 |
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
91 |
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
92 |
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
93 |
OTHER DEALINGS IN THE SOFTWARE.
|
10014
|
94 |
"
|
|
95 |
!
|
|
96 |
|
|
97 |
documentation
|
|
98 |
"
|
|
99 |
I'm standard diff implementation written purely in Smalltalk. I can
|
17505
|
100 |
compute differences between two sequenceable collections, not necessarily
|
10014
|
101 |
holding strings. Elements are compared using #=.
|
|
102 |
|
|
103 |
Result of comparison is an edit script, a linked list of Diff::Changes,
|
|
104 |
each keeping one difference: whether change is insert and/or delete,
|
|
105 |
and positions in A and B.
|
|
106 |
|
|
107 |
I'm a port of Java diff.
|
|
108 |
|
|
109 |
[author:]
|
|
110 |
Jakub Zelenka (zelenj7@fel.cvut.cz)
|
|
111 |
Vladislav Skoumal (skoumal@skoumal.net)
|
|
112 |
Jan Vrany (jan.vrany@fit.cvut.cz)
|
|
113 |
|
|
114 |
[instance variables:]
|
|
115 |
|
|
116 |
[class variables:]
|
|
117 |
|
|
118 |
[see also:]
|
|
119 |
|
|
120 |
"
|
|
121 |
!
|
|
122 |
|
|
123 |
documentation_czech
|
|
124 |
"
|
17505
|
125 |
první fáze:
|
10014
|
126 |
#############################################################################################################################
|
|
127 |
first := #('prvni' 'druhy' 'treti' 'treti' 'paty' 'zeleny' 'ruzovy' ).
|
|
128 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy' 'treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa' ).
|
|
129 |
############################################################################################################################
|
17505
|
130 |
First a second pøedstavujou dvì pole, které chceme porovnávat. Jednotlivé položky v poli si lze pøedstavit jako øádky, pøípadnì jako slova v øádku.
|
|
131 |
Podle toho, co je potøeba porovnávat.
|
10014
|
132 |
|
|
133 |
*****************************************************************************************************************************
|
|
134 |
diff := FelDiff new felDiff.
|
|
135 |
*****************************************************************************************************************************
|
17505
|
136 |
Zde probíhá inicializace defaultníh promìnných. Funguje to jako konstruktor.
|
10014
|
137 |
|
|
138 |
############################################################################################################################
|
|
139 |
diff diff: first b: second
|
|
140 |
############################################################################################################################
|
17505
|
141 |
První fáze nutná pro porovnávání polí. Vzniknou dvì instance tøíde filedata uložené do pole. Tyto instance budou obsahovat následující údaje:
|
10014
|
142 |
|
|
143 |
filevec[1].equivs=#(1 2 3 3 4 5 6)
|
|
144 |
filevec[1].bufferedLines=7
|
|
145 |
filevec[1].changedFlag=#()
|
|
146 |
|
|
147 |
filevec[2].equivs=#(1 3 6 7 3 8 9 10 10 11 12 13 14 15 10 10)
|
|
148 |
filevec[2].bufferedLines=16
|
|
149 |
filevec[2].changedFlag=#()
|
|
150 |
|
17505
|
151 |
V zásadì se vytvoøila structura Dictionary, která jednotlivé øádky(slova) pøevedla na èísla. Pole equvs pak pøedstavuje èíselnì slova(øádky).
|
|
152 |
èísla, která se nalézají v obou dbou polí equivs znaèí, že soubory sdílí alespoò nìjaké slovo(øádek).
|
10014
|
153 |
|
|
154 |
*****************************************************************************************************************************
|
|
155 |
change:= diff diff2: true.
|
|
156 |
*****************************************************************************************************************************
|
|
157 |
|
17505
|
158 |
Zde již dochází k porovnání obou dvou polí s øádky(slovy). Lze si vybrat mezi forwardscriptem a reversescriptem.
|
10014
|
159 |
|
|
160 |
1) metoda discardconfusinglines
|
17505
|
161 |
výsledek:
|
10014
|
162 |
filevec[1].undiscardeded=#(1 3 3 5 6 0 0)
|
|
163 |
filevec[1].realIndexes= #(0 2 3 5 6 0 0)
|
|
164 |
filevec[1].nondiscardedLines=5
|
|
165 |
filevec[1].changedFlag=#(false false true false false true false false false)
|
|
166 |
|
|
167 |
filevec[2].undiscardeded=#(1 3 5 6 3 0 0 0 0 0 0 0 0 0 0 0)
|
|
168 |
filevec[2].realIndexes= #(0 1 2 3 4 0 0 0 0 0 0 0 0 0 0 0)
|
|
169 |
filevec[2].nondiscardedLines=5
|
|
170 |
|
17505
|
171 |
Undiscarded- Øádky soubory, které jsou shodné.
|
|
172 |
RealIndexes - indexy øádkù v poli(je potøeba pøièíst jedna)
|
|
173 |
- to znamená že index prvního 3->3 pozice v prvním vstupním poli
|
|
174 |
- index druhého 3->2 pozice v druhém vstupním poli a 3->5 pozice v druhém vstupním poli
|
|
175 |
NondiscardedLines- znaèí kolik èádkù(slov) je shodných v obou polích.
|
10014
|
176 |
|
17505
|
177 |
2)Následuje porovnávání jednotlivých polí a vytvoøení výsledku
|
10014
|
178 |
|
17505
|
179 |
3)Výsledek Reverse skript
|
10014
|
180 |
|
|
181 |
inserted=12('treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa')
|
|
182 |
deleted=0
|
|
183 |
line0=7('ruzovy')
|
|
184 |
line1=4('ruzovy')
|
|
185 |
link=next
|
|
186 |
|
17505
|
187 |
inserted- kolik znakù bylo vloženo
|
|
188 |
deleted - kolik znakù bylo smazáno
|
|
189 |
line0 - poøadí znaku za kterým bylo nìco vloženo(smazáno) v prvním poli(poslední znak který je shodný v obou polích)
|
|
190 |
line1 - poøadí znaku za kterým bylo nìco smazáno(vloženo) v prvním poli(poslední znak který je shodný v obou polích)
|
10014
|
191 |
|
17505
|
192 |
Takže po znaku na pozici 4, je 12 vložených znakù oproti prvnímu
|
10014
|
193 |
|
17505
|
194 |
Zbytek pole vypadá takto:
|
10014
|
195 |
|
|
196 |
first := #('prvni' 'druhy' 'treti' 'treti' 'paty' 'zeleny' 'ruzovy' ).
|
|
197 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy').
|
|
198 |
|
17505
|
199 |
link není null a tudíž odkazuje na další informace o zmìnách.
|
10014
|
200 |
inserted=0
|
|
201 |
deleted=2('treti' 'paty')
|
|
202 |
line0=3('treti')
|
|
203 |
line1=2('treti')
|
|
204 |
link=next
|
|
205 |
|
17505
|
206 |
zbytek pole vypadá takto:
|
10014
|
207 |
first := #('prvni' 'druhy' 'treti' 'zeleny' 'ruzovy' ).
|
|
208 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy').
|
|
209 |
|
17505
|
210 |
link není null a tudíž odkazuje na další informace o zmìnách.
|
10014
|
211 |
inserted=0
|
|
212 |
deleted=1('druhy')
|
|
213 |
line0=1('prvni')
|
|
214 |
line1=1('prvni')
|
|
215 |
link=nil
|
|
216 |
|
17505
|
217 |
zbytek pole vypadá takto:
|
10014
|
218 |
first := #('prvni' 'treti' 'zeleny' 'ruzovy' ).
|
|
219 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy').
|
|
220 |
|
17505
|
221 |
link je nil. Neexistuje žádná zmìna a tato pole jsou shodná.
|
10014
|
222 |
|
17505
|
223 |
4)Výsledek Forward skript
|
10014
|
224 |
|
|
225 |
inserted=0
|
|
226 |
deleted=1('druhy')
|
|
227 |
line0=1('prvni')
|
|
228 |
line1=1('prvni')
|
|
229 |
link=next
|
|
230 |
|
17505
|
231 |
zbytek pole vypadá takto:
|
10014
|
232 |
first := #('prvni' 'treti' 'treti' 'paty' 'zeleny' 'ruzovy' ).
|
|
233 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy' 'treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa' ).
|
|
234 |
|
17505
|
235 |
link není nil jdeme na odkaz:
|
10014
|
236 |
inserted=0
|
|
237 |
deleted=2('treti' 'paty')
|
|
238 |
line0=3('treti')
|
|
239 |
line1=2('treti')
|
|
240 |
link=next
|
|
241 |
|
17505
|
242 |
zbytek pole vypadá takto:
|
10014
|
243 |
first := #('prvni' 'treti' 'zeleny' 'ruzovy' ).
|
|
244 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy' 'treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa' ).
|
|
245 |
|
17505
|
246 |
link není nil jdeme na odkaz:
|
10014
|
247 |
|
|
248 |
inserted=12('treti' 'bbb' 'ccc' 'aaa' 'aaa' 'hhh' 'iii' 'mmm' 'nnn' 'ppp' 'aaa' 'aaa')
|
|
249 |
deleted=0
|
|
250 |
line0=7('ruzovy')
|
|
251 |
line1=4('ruzovy')
|
|
252 |
link=nil
|
|
253 |
|
17505
|
254 |
zbytek pole vypadá takto:
|
10014
|
255 |
first := #('prvni' 'treti' 'zeleny' 'ruzovy' ).
|
|
256 |
second := #('prvni' 'treti' 'zeleny' 'ruzovy').
|
|
257 |
|
|
258 |
Konec
|
|
259 |
"
|
|
260 |
! !
|
|
261 |
|
|
262 |
!Diff class methodsFor:'instance creation'!
|
|
263 |
|
|
264 |
new
|
|
265 |
"return an initialized instance"
|
|
266 |
|
|
267 |
^ self basicNew initialize.
|
|
268 |
! !
|
|
269 |
|
|
270 |
!Diff class methodsFor:'diffing'!
|
|
271 |
|
|
272 |
between: a and: b
|
|
273 |
|
|
274 |
^self between: a and: b reverse: false
|
|
275 |
|
|
276 |
"Created: / 16-02-2010 / 23:08:55 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
277 |
!
|
|
278 |
|
|
279 |
between: a and: b reverse: reverse
|
|
280 |
|
|
281 |
^self new
|
|
282 |
a: a b: b;
|
|
283 |
diff: reverse
|
|
284 |
|
|
285 |
"Created: / 16-02-2010 / 23:04:50 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
286 |
! !
|
|
287 |
|
|
288 |
!Diff methodsFor:'diffing'!
|
|
289 |
|
|
290 |
a:gA b:gB
|
|
291 |
"Prepare to find differences between two arrays. Each element of
|
|
292 |
the arrays is translated to an"
|
|
293 |
"equivalence number"
|
|
294 |
" based on
|
|
295 |
the result of <code>equals</code>. The original Object arrays
|
|
296 |
are no longer needed for computing the differences. They will
|
|
297 |
be needed again later to print the results of the comparison as
|
|
298 |
an edit script, if desired."
|
|
299 |
|
|
300 |
|h data|
|
|
301 |
|
|
302 |
h := Dictionary new:(gA size + gB size).
|
|
303 |
data := Data new.
|
|
304 |
data fileData.
|
|
305 |
data
|
|
306 |
fileData:gA
|
|
307 |
hashTable:h
|
|
308 |
felDiff:self.
|
|
309 |
self filevec at:1 put:data.
|
|
310 |
data := Data new.
|
|
311 |
data fileData.
|
|
312 |
data
|
|
313 |
fileData:gB
|
|
314 |
hashTable:h
|
|
315 |
felDiff:self.
|
|
316 |
self filevec at:2 put:data.
|
|
317 |
|
|
318 |
"Modified: / 12-02-2010 / 14:22:56 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
319 |
!
|
|
320 |
|
|
321 |
diff
|
|
322 |
|
|
323 |
^self diff: false
|
|
324 |
|
|
325 |
"Created: / 16-02-2010 / 22:50:26 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
326 |
!
|
|
327 |
|
|
328 |
diff:reverse
|
|
329 |
|
|
330 |
^reverse
|
|
331 |
ifTrue:[self diffUsingScript: ReverseScript new]
|
|
332 |
ifFalse:[self diffUsingScript: ForwardScript new]
|
|
333 |
|
|
334 |
"Modified: / 16-02-2010 / 22:51:43 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
335 |
!
|
|
336 |
|
|
337 |
diffUsingScript:bld
|
|
338 |
"Get the results of comparison as an edit script. The script
|
|
339 |
is described by a list of changes. The standard ScriptBuilder
|
|
340 |
implementations provide for forward and reverse edit scripts.
|
|
341 |
Alternate implementations could, for instance, list common elements
|
|
342 |
instead of differences.
|
|
343 |
@param bld an object to build the script from change flags
|
|
344 |
@return the head of a list of changes
|
|
345 |
Some lines are obviously insertions or deletions
|
|
346 |
because they don't match anything. Detect them now,
|
|
347 |
and avoid even thinking about them in the main comparison algorithm."
|
|
348 |
|
|
349 |
|diags first second ret|
|
|
350 |
|
|
351 |
self discardConfusingLines.
|
|
352 |
"Now do the main comparison algorithm, considering just the
|
|
353 |
undiscarded lines."
|
|
354 |
first := filevec at:1.
|
|
355 |
second := filevec at:2.
|
|
356 |
xvec := first undiscarded.
|
|
357 |
yvec := second undiscarded.
|
|
358 |
diags := (first nondiscardedLines) + (second nondiscardedLines) + 3.
|
|
359 |
fdiag := Array new:diags withAll:0.
|
|
360 |
fdiagoff := second nondiscardedLines + 1.
|
|
361 |
bdiag := Array new:diags withAll:0.
|
|
362 |
bdiagoff := second nondiscardedLines + 1.
|
|
363 |
self
|
|
364 |
compareseq:0
|
|
365 |
xlim:first nondiscardedLines
|
|
366 |
yoff:0
|
|
367 |
ylim:second nondiscardedLines.
|
|
368 |
fdiag := nil.
|
|
369 |
bdiag := nil.
|
|
370 |
self shiftBoundaries.
|
|
371 |
ret := bld
|
|
372 |
buildScript:first changedFlag
|
|
373 |
length0:first bufferedLines
|
|
374 |
changed1:second changedFlag
|
|
375 |
length1:second bufferedLines.
|
|
376 |
^ ret.
|
|
377 |
|
|
378 |
"Modified: / 12-02-2010 / 13:57:09 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
379 |
! !
|
|
380 |
|
|
381 |
!Diff methodsFor:'initialization'!
|
|
382 |
|
|
383 |
initialize
|
|
384 |
"konstruktor"
|
|
385 |
|
|
386 |
equivMax := 1.
|
|
387 |
heuristic := false.
|
|
388 |
nodiscards := false.
|
16884
|
389 |
xvec := #().
|
|
390 |
yvec := #().
|
|
391 |
fdiag := #().
|
|
392 |
bdiag := #().
|
10014
|
393 |
filevec := Array new:2.
|
|
394 |
snakeLimit := 20.
|
|
395 |
inhibit := false.
|
|
396 |
|
|
397 |
"Modified: / 16-02-2010 / 22:51:04 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
398 |
! !
|
|
399 |
|
|
400 |
!Diff methodsFor:'private'!
|
|
401 |
|
|
402 |
compareseq:gXoff xlim:gXlim yoff:gYoff ylim:gYlim
|
|
403 |
"Compare in detail contiguous subsequences of the two files
|
|
404 |
which are known, as a whole, to match each other.
|
|
405 |
|
|
406 |
The results are recorded in the vectors filevec[N].changedflag, by
|
|
407 |
storing a 1 in the element for each line that is an insertion or deletion.
|
|
408 |
|
|
409 |
The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
|
|
410 |
|
|
411 |
Note that XLIM, YLIM are exclusive bounds.
|
|
412 |
All line numbers are origin-0 and discarded lines are not counted."
|
|
413 |
|
|
414 |
|xoff xlim yoff ylim c d f b|
|
|
415 |
|
|
416 |
xoff := gXoff.
|
|
417 |
xlim := gXlim.
|
|
418 |
yoff := gYoff.
|
|
419 |
ylim := gYlim.
|
|
420 |
|
|
421 |
"Slide down the bottom initial diagonal."
|
|
422 |
[(xoff < xlim) and: [(yoff < ylim) and: [(xvec at: xoff + 1) = (yvec at: yoff + 1)]]] whileTrue:
|
|
423 |
[
|
|
424 |
xoff := xoff + 1.
|
|
425 |
yoff := yoff + 1.
|
|
426 |
].
|
|
427 |
|
|
428 |
"Slide up the top initial diagonal."
|
|
429 |
[(xlim > xoff) and: [(ylim > yoff) and: [(xvec at: xlim) = (yvec at: ylim)]]] whileTrue:
|
|
430 |
[
|
|
431 |
xlim := xlim - 1.
|
|
432 |
ylim := ylim - 1.
|
|
433 |
].
|
|
434 |
|
|
435 |
"Handle simple cases."
|
|
436 |
|
|
437 |
(xoff = xlim) ifTrue:
|
|
438 |
[
|
|
439 |
[yoff < ylim] whileTrue:
|
|
440 |
[
|
|
441 |
((filevec at: 2) changedFlag) at: (2 + ((filevec at: 2) realindexes at: yoff+1)) put: true.
|
|
442 |
yoff := yoff + 1.
|
|
443 |
]
|
|
444 |
]
|
|
445 |
ifFalse:
|
|
446 |
[
|
|
447 |
(yoff = ylim) ifTrue:
|
|
448 |
[
|
|
449 |
[xoff < xlim] whileTrue:
|
|
450 |
[
|
|
451 |
((filevec at: 1) changedFlag) at: (2 + ((filevec at: 1) realindexes at: xoff+1)) put: true.
|
|
452 |
xoff := xoff + 1.
|
|
453 |
]
|
|
454 |
]
|
|
455 |
ifFalse:
|
|
456 |
[
|
|
457 |
"Find a point of correspondence in the middle of the files."
|
|
458 |
d := self diag: xoff xlim: xlim yoff: yoff ylim: ylim.
|
|
459 |
c := cost.
|
|
460 |
f := fdiag at: (fdiagoff + d+1).
|
|
461 |
b := bdiag at: (bdiagoff + d+1).
|
|
462 |
|
|
463 |
(c = 1) ifTrue:
|
|
464 |
[
|
|
465 |
"This should be impossible, because it implies that
|
|
466 |
one of the two subsequences is empty,
|
|
467 |
and that case was handled above without calling `diag'.
|
|
468 |
Let's verify that this is true."
|
|
469 |
d := Exception new.
|
12692
|
470 |
d raiseSignal.
|
10014
|
471 |
]
|
|
472 |
ifFalse:
|
|
473 |
[
|
|
474 |
"Use that point to split this problem into two subproblems."
|
|
475 |
self compareseq: xoff xlim: b yoff: yoff ylim: (b - d).
|
|
476 |
"This used to use f instead of b,
|
|
477 |
but that is incorrect!!
|
|
478 |
It is not necessarily the case that diagonal d
|
|
479 |
has a snake from b to f."
|
|
480 |
self compareseq: b xlim: xlim yoff: (b - d) ylim: ylim.
|
|
481 |
]
|
|
482 |
]
|
|
483 |
]
|
|
484 |
!
|
|
485 |
|
|
486 |
diag: anXoff xlim: anXlim yoff: aYoff ylim: aYlim
|
|
487 |
|fd bd xv yv dmin dmax fmid fmax bmid bmax fmin bmin odd c cont d bigsnake tlo thi x oldx y best bestpos dd v temp k cont2|
|
|
488 |
fd := fdiag. "Give the compiler a chance."
|
|
489 |
bd := bdiag. "Additional help for the compiler."
|
|
490 |
xv := xvec. "Still more help for the compiler."
|
|
491 |
yv := yvec. "And more and more . . ."
|
|
492 |
dmin := anXoff-aYlim. "Minimum valid diagonal."
|
|
493 |
dmax := anXlim-aYoff. "Maximum valid diagonal."
|
|
494 |
fmid := anXoff-aYoff. "Center diagonal of top-down search."
|
|
495 |
bmid := anXlim-aYlim. "Center diagonal of bottom-up search."
|
|
496 |
fmin := fmid. "Limits of top-down search."
|
|
497 |
fmax := fmid. " --||-- "
|
|
498 |
bmin := bmid. "Limits of bottom-up search."
|
|
499 |
bmax := bmid. " --||-- "
|
|
500 |
|
|
501 |
odd := (fmid-bmid) odd. "True if southeast corner is on an odd diagonal with respect to the northwest."
|
|
502 |
|
|
503 |
"Added + 1 to all arrays since StX uses index 1 as first"
|
|
504 |
fd at:(fdiagoff+fmid + 1) put: anXoff.
|
|
505 |
bd at:(bdiagoff+bmid + 1) put: anXlim.
|
|
506 |
|
|
507 |
c := 1.
|
|
508 |
cont := true.
|
|
509 |
[cont = true] whileTrue:[
|
|
510 |
d := nil. "Active diagonal."
|
|
511 |
bigsnake := false.
|
|
512 |
|
|
513 |
"Extend the top-down search by an edit step in each diagonal."
|
|
514 |
(fmin > dmin) ifTrue:[
|
|
515 |
fmin := fmin-1.
|
|
516 |
fd at:(fdiagoff + fmin - 1 + 1) put: -1.
|
|
517 |
] ifFalse:[ fmin := fmin + 1. ].
|
|
518 |
(fmax < dmax) ifTrue:[
|
|
519 |
fmax := fmax+1.
|
|
520 |
fd at:(fdiagoff + fmax + 1 + 1) put: -1.
|
|
521 |
] ifFalse:[ fmax := fmax - 1. ].
|
|
522 |
|
|
523 |
d := fmax.
|
|
524 |
[(d >= fmin)] whileTrue:[
|
|
525 |
tlo := fd at:(fdiagoff + d - 1 + 1).
|
|
526 |
thi := fd at:(fdiagoff + d + 1 + 1).
|
|
527 |
(tlo >= thi) ifTrue:[
|
|
528 |
x := tlo + 1.
|
|
529 |
] ifFalse:[ x := thi. ].
|
|
530 |
oldx := x.
|
|
531 |
y := x - d.
|
|
532 |
[(x < anXlim) and: [(y < aYlim) and: [((xv at: (x+1)) = (yv at: (y+1)))]]] whileTrue:[
|
|
533 |
x := x+1.
|
|
534 |
y := y+1.
|
|
535 |
].
|
|
536 |
((x-oldx) > snakeLimit) ifTrue:[
|
|
537 |
bigsnake := true.
|
|
538 |
].
|
|
539 |
fd at: (fdiagoff + d + 1) put: x.
|
|
540 |
(odd and: [bmin <= d and: [d <= bmax and:[(bd at:(bdiagoff + d + 1)) <= (fd at:(fdiagoff + d + 1))]]]) ifTrue:[
|
|
541 |
cost := (2 * c) - 1.
|
|
542 |
^d.
|
|
543 |
] ifFalse:[ d := d - 2.].
|
|
544 |
].
|
|
545 |
|
|
546 |
"Similar extend the bottom-up search."
|
|
547 |
(bmin > dmin) ifTrue:[
|
|
548 |
bmin := bmin - 1.
|
|
549 |
bd at:(bdiagoff + bmin - 1 + 1) put: 2147483647.
|
|
550 |
] ifFalse:[ bmin := bmin + 1.].
|
|
551 |
(bmax < dmax) ifTrue:[
|
|
552 |
bmax := bmax + 1.
|
|
553 |
bd at:(bdiagoff + bmax + 1 + 1) put: 2147483647.
|
|
554 |
] ifFalse:[ bmax := bmax - 1.].
|
|
555 |
|
|
556 |
d := bmax.
|
|
557 |
[(d >= bmin)] whileTrue:[
|
|
558 |
tlo := bd at:(bdiagoff + d - 1 + 1).
|
|
559 |
thi := bd at:(bdiagoff + d + 1 + 1).
|
|
560 |
(tlo < thi) ifTrue:[
|
|
561 |
x := tlo.
|
|
562 |
] ifFalse:[ x := thi - 1. ].
|
|
563 |
oldx := x.
|
|
564 |
y := x - d.
|
|
565 |
[(x > anXoff) and: [(y > aYoff) and: [((xv at: (x-1+1)) = (yv at: (y-1+1)))]]] whileTrue:[
|
|
566 |
x := x-1.
|
|
567 |
y := y-1.
|
|
568 |
].
|
|
569 |
((x-oldx) > snakeLimit) ifTrue:[
|
|
570 |
bigsnake := true.
|
|
571 |
].
|
|
572 |
bd at: (bdiagoff + d + 1) put: x.
|
|
573 |
((odd = false) and: [fmin <= d and: [d <= fmax and:[(bd at:(bdiagoff + d + 1)) <= (fd at:(fdiagoff + d + 1))]]]) ifTrue:[
|
|
574 |
cost := (2 * c).
|
|
575 |
^d.
|
|
576 |
] ifFalse:[ d := d - 2.].
|
|
577 |
].
|
|
578 |
|
|
579 |
"Heuristic: check occasionally for a diagonal that has made
|
|
580 |
lots of progress compared with the edit distance.
|
|
581 |
If we have any such, find the one that has made the most
|
|
582 |
progress and return it as if it had succeeded.
|
|
583 |
|
|
584 |
With this heuristic, for files with a constant small density
|
|
585 |
of changes, the algorithm is linear in the file size."
|
|
586 |
((c>200) and:[bigsnake and:[heuristic]]) ifTrue:[
|
|
587 |
best := 0.
|
|
588 |
bestpos := -1.
|
|
589 |
d := fmax.
|
|
590 |
[(d >= fmin)] whileTrue:[
|
|
591 |
dd := d - fmid.
|
|
592 |
x := fd at: (fdiagoff + d + 1).
|
|
593 |
y := x - d.
|
|
594 |
v := ((x - anXoff) * 2) - dd.
|
|
595 |
temp := ((dd abs) + c) * 12.
|
|
596 |
(v > temp) ifTrue:[
|
|
597 |
((v > best) and:[(anXoff + snakeLimit <= x) and:[(x < anXlim) and:[(aYoff + snakeLimit <= y) and:[(y < aYlim)]]]]) ifTrue:[
|
|
598 |
"We have a good enough best diagonal;
|
|
599 |
now insist that it end with a significant snake."
|
|
600 |
k := 1.
|
|
601 |
cont2 := true.
|
|
602 |
[(xvec at:(x-k + 1)) = (yvec at:(y-k + 1)) and:[cont2]] whileTrue:[
|
|
603 |
(k = snakeLimit) ifTrue:[
|
|
604 |
best := v.
|
|
605 |
bestpos := d.
|
|
606 |
cont2 := false.
|
|
607 |
] ifFalse:[ k := k + 1.].
|
|
608 |
].
|
|
609 |
].
|
|
610 |
].
|
|
611 |
d := d - 2.
|
|
612 |
].
|
|
613 |
(best > 0) ifTrue:[
|
|
614 |
cost := (2 * c) - 1.
|
|
615 |
^bestpos.
|
|
616 |
].
|
|
617 |
|
|
618 |
best := 0.
|
|
619 |
d := bmax.
|
|
620 |
[(d >= bmin)] whileTrue:[
|
|
621 |
dd := d - bmid.
|
|
622 |
x := bd at: (bdiagoff + d + 1).
|
|
623 |
y := x - d.
|
|
624 |
v := ((anXlim - x) * 2) + dd.
|
|
625 |
temp := ((dd abs) + c) * 12.
|
|
626 |
(v > temp) ifTrue:[
|
|
627 |
((v > best) and:[(anXoff < x) and:[(x <= (anXlim - snakeLimit)) and:[(aYoff < y) and:[(y <= (aYlim - snakeLimit))]]]]) ifTrue:[
|
|
628 |
"We have a good enough best diagonal;
|
|
629 |
now insist that it end with a significant snake."
|
|
630 |
k := 0.
|
|
631 |
cont2 := true.
|
|
632 |
[((xvec at:(x+k + 1)) = (yvec at:(y+k + 1))) and:[cont2]] whileTrue:[
|
|
633 |
(k = snakeLimit) ifTrue:[
|
|
634 |
best := v.
|
|
635 |
bestpos := d.
|
|
636 |
cont2 := false.
|
|
637 |
] ifFalse:[ k := k + 1.].
|
|
638 |
].
|
|
639 |
].
|
|
640 |
].
|
|
641 |
d := d - 2.
|
|
642 |
].
|
|
643 |
(best > 0) ifTrue:[
|
|
644 |
cost := (2 * c) - 1.
|
|
645 |
^bestpos.
|
|
646 |
].
|
|
647 |
].
|
|
648 |
c := c + 1.
|
|
649 |
]
|
|
650 |
!
|
|
651 |
|
|
652 |
discardConfusingLines
|
|
653 |
"Discard lines from one file that have no matches in the other file."
|
|
654 |
|
|
655 |
|first second|
|
|
656 |
|
|
657 |
first := filevec at:1.
|
|
658 |
second := filevec at:2.
|
|
659 |
first discardConfusingLines:second felDiff:self.
|
|
660 |
second discardConfusingLines: first felDiff:self.
|
|
661 |
!
|
|
662 |
|
|
663 |
equivMax
|
|
664 |
^ equivMax
|
|
665 |
!
|
|
666 |
|
|
667 |
equivMax:something
|
|
668 |
equivMax := something.
|
|
669 |
!
|
|
670 |
|
|
671 |
filevec
|
|
672 |
^ filevec
|
|
673 |
!
|
|
674 |
|
|
675 |
nodiscards
|
|
676 |
^ nodiscards
|
|
677 |
!
|
|
678 |
|
|
679 |
nodiscards:something
|
|
680 |
nodiscards := something.
|
|
681 |
!
|
|
682 |
|
|
683 |
shiftBoundaries
|
|
684 |
"Adjust inserts/deletes of blank lines to join changes
|
|
685 |
as much as possible."
|
|
686 |
|
|
687 |
|first second|
|
|
688 |
|
|
689 |
(inhibit) ifTrue:[
|
|
690 |
^ nil.
|
|
691 |
].
|
|
692 |
first := filevec at:1.
|
|
693 |
second := filevec at:2.
|
|
694 |
first shiftBoundaries:second.
|
|
695 |
second shiftBoundaries:first.
|
|
696 |
! !
|
|
697 |
|
|
698 |
!Diff::Change class methodsFor:'documentation'!
|
|
699 |
|
|
700 |
documentation
|
|
701 |
"
|
|
702 |
The result of comparison is an ""edit script"": a chain of change objects.
|
|
703 |
Each change represents one place where some lines are deleted
|
|
704 |
and some are inserted.
|
|
705 |
|
|
706 |
LINE0 and LINE1 are the first affected lines in the two files (origin 0).
|
|
707 |
DELETED is the number of lines deleted here from file 0.
|
|
708 |
INSERTED is the number of lines inserted here in file 1.
|
|
709 |
|
|
710 |
If DELETED is 0 then LINE0 is the number of the line before
|
|
711 |
which the insertion was done; vice versa for INSERTED and LINE1.
|
|
712 |
"
|
|
713 |
! !
|
|
714 |
|
|
715 |
!Diff::Change methodsFor:'accessing'!
|
|
716 |
|
|
717 |
deleted
|
|
718 |
"Line number of 1st deleted line."
|
|
719 |
^ deleted
|
|
720 |
!
|
|
721 |
|
|
722 |
inserted
|
|
723 |
"# lines of file 0 changed here."
|
|
724 |
^ inserted
|
|
725 |
!
|
|
726 |
|
|
727 |
line0
|
|
728 |
"Line number of 1st deleted line."
|
|
729 |
^ line0
|
|
730 |
!
|
|
731 |
|
|
732 |
line1
|
|
733 |
"Line number of 1st inserted line."
|
|
734 |
^ line1
|
|
735 |
! !
|
|
736 |
|
|
737 |
!Diff::Change methodsFor:'enumerating'!
|
|
738 |
|
|
739 |
do: aBlock
|
|
740 |
|
|
741 |
| chg |
|
|
742 |
chg := self.
|
|
743 |
[ chg notNil ] whileTrue:
|
|
744 |
[aBlock value: chg.
|
|
745 |
chg := chg nextLink].
|
|
746 |
|
|
747 |
"Created: / 16-02-2010 / 22:53:40 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
748 |
! !
|
|
749 |
|
|
750 |
!Diff::Change methodsFor:'instance creation'!
|
|
751 |
|
|
752 |
newLine0:aLine0 line1:aLine1 deleted:aDeleted inserted:aInserted next: nextChange
|
|
753 |
"Cons an additional entry onto the front of an edit script OLD.
|
|
754 |
LINE0 and LINE1 are the first affected lines in the two files (origin 0).
|
|
755 |
DELETED is the number of lines deleted here from file 0.
|
|
756 |
INSERTED is the number of lines inserted here in file 1.
|
|
757 |
|
|
758 |
If DELETED is 0 then LINE0 is the number of the line before
|
|
759 |
which the insertion was done; vice versa for INSERTED and LINE1."
|
|
760 |
|
|
761 |
line0 := aLine0.
|
|
762 |
line1 := aLine1.
|
|
763 |
deleted := aDeleted.
|
|
764 |
inserted := aInserted.
|
|
765 |
nextLink := nextChange.
|
|
766 |
|
|
767 |
"Modified: / 12-02-2010 / 13:42:30 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
768 |
! !
|
|
769 |
|
|
770 |
!Diff::Data methodsFor:'accessing'!
|
|
771 |
|
|
772 |
bufferedLines
|
|
773 |
^ bufferedLines
|
|
774 |
!
|
|
775 |
|
|
776 |
changedFlag
|
|
777 |
^ changedFlag
|
|
778 |
!
|
|
779 |
|
|
780 |
nondiscardedLines
|
|
781 |
^ nondiscardedLines
|
|
782 |
!
|
|
783 |
|
|
784 |
realindexes
|
|
785 |
^ realindexes
|
|
786 |
!
|
|
787 |
|
|
788 |
undiscarded
|
|
789 |
^ undiscarded
|
|
790 |
! !
|
|
791 |
|
|
792 |
!Diff::Data methodsFor:'default'!
|
|
793 |
|
|
794 |
clear
|
|
795 |
"Allocate changed array for the results of comparison.
|
|
796 |
Allocate a flag for each line of each file, saying whether that line
|
|
797 |
is an insertion or deletion. allocate an extra element, always zero,
|
|
798 |
at each end of each vector."
|
|
799 |
|
|
800 |
changedFlag := Array new:bufferedLines + 2 withAll:false
|
|
801 |
|
|
802 |
"Modified: / 12-02-2010 / 13:55:52 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
803 |
!
|
|
804 |
|
|
805 |
discard:discards felDiff:fellDiffClass
|
|
806 |
"Actually discard the lines.
|
|
807 |
@param discards flags lines to be discarded"
|
|
808 |
|end j i|
|
|
809 |
end:=bufferedLines.
|
|
810 |
j:=0.
|
|
811 |
i:=0.
|
|
812 |
[i<end]whileTrue:[
|
|
813 |
(fellDiffClass nodiscards or:[(discards at:i+1)=0])ifTrue:[
|
|
814 |
undiscarded at:j+1 put:(equivs at:i+1).
|
|
815 |
realindexes at:j+1 put:i.
|
|
816 |
j:=j+1.
|
|
817 |
]ifFalse:[
|
|
818 |
changedFlag at:(i+1+1) put:true.
|
|
819 |
].
|
|
820 |
nondiscardedLines :=j.
|
|
821 |
i:=i+1.
|
|
822 |
].
|
|
823 |
!
|
|
824 |
|
|
825 |
discardConfusingLines: f felDiff: felDiff
|
|
826 |
"
|
|
827 |
Discard lines that have no matches in another file.
|
|
828 |
|
|
829 |
A line which is discarded will not be considered by the actual
|
|
830 |
comparison algorithm; it will be as if that line were not in the file.
|
|
831 |
The file's `realindexes' table maps virtual line numbers
|
|
832 |
(which don't count the discarded lines) into real line numbers;
|
|
833 |
this is how the actual comparison algorithm produces results
|
|
834 |
that are comprehensible when the discarded lines are counted.
|
|
835 |
|
|
836 |
When we discard a line, we also mark it as a deletion or insertion
|
|
837 |
so that it will be printed in the output.
|
|
838 |
@param f the other file
|
|
839 |
"
|
|
840 |
| discarded |
|
|
841 |
self clear.
|
|
842 |
|
|
843 |
"Set up table of which lines are going to be discarded."
|
|
844 |
discarded := self discardable: (f equivCount: felDiff).
|
|
845 |
|
|
846 |
"Don't really discard the provisional lines except when they occur
|
|
847 |
in a run of discardables, with nonprovisionals at the beginning
|
|
848 |
and end."
|
|
849 |
self filterDiscards: discarded.
|
|
850 |
|
|
851 |
"Actually discard the lines."
|
|
852 |
self discard: discarded felDiff: felDiff.
|
|
853 |
!
|
|
854 |
|
|
855 |
discardable: counts
|
|
856 |
" Mark to be discarded each line that matches no line of another file.
|
|
857 |
If a line matches many lines, mark it as provisionally discardable.
|
|
858 |
@see equivCount()
|
|
859 |
@param counts The count of each equivalence number for the other file.
|
|
860 |
@return 0=nondiscardable, 1=discardable or 2=provisionally discardable
|
|
861 |
for each line"
|
|
862 |
| nmatch i end discards equivs2 many tem |
|
|
863 |
end := bufferedLines.
|
|
864 |
discards := Array new: end.
|
|
865 |
equivs2 := equivs.
|
|
866 |
many := 5.
|
|
867 |
tem := (end / 64).
|
|
868 |
tem :=tem asInteger.
|
|
869 |
tem := tem >> 2.
|
|
870 |
i:=1.
|
|
871 |
[i<=end]whileTrue:[discards at:i put:0.
|
|
872 |
i:=i+1.].
|
|
873 |
"Multiply MANY by approximate square root of number of lines.
|
|
874 |
That is the threshold for provisionally discardable lines. "
|
|
875 |
[tem > 0]
|
|
876 |
whileTrue: [many := many * 2.
|
|
877 |
tem := tem >> 2
|
|
878 |
].
|
|
879 |
i := 1.
|
|
880 |
[i <= end]
|
|
881 |
whileTrue: [(equivs2 at: i)
|
|
882 |
= 0
|
|
883 |
ifFalse: [nmatch := counts
|
|
884 |
at: (equivs2 at: i)+1.
|
|
885 |
nmatch = 0
|
|
886 |
ifTrue: [discards at: i put: 1]
|
|
887 |
ifFalse: [nmatch > many
|
|
888 |
ifTrue: [discards at: i put: 2]]].
|
|
889 |
i := i + 1].
|
|
890 |
|
|
891 |
^ discards
|
|
892 |
!
|
|
893 |
|
|
894 |
equivCount: felDiff
|
|
895 |
| pom i equivCount size|
|
|
896 |
equivCount := Array new: (felDiff equivMax) withAll: 0.
|
|
897 |
i:=1.
|
|
898 |
size:=equivCount size.
|
|
899 |
[i<=size]whileTrue:[
|
|
900 |
equivCount at:i put:0.
|
|
901 |
i:=i+1.
|
|
902 |
].
|
|
903 |
|
|
904 |
i := 0.
|
|
905 |
[i < bufferedLines]
|
|
906 |
whileTrue: [
|
|
907 |
pom:=equivs at: i+1.
|
|
908 |
pom := equivCount at: pom+1.
|
|
909 |
pom := pom + 1.
|
|
910 |
equivCount at: (equivs at: i+1)+1 put: pom.
|
|
911 |
i := i + 1.].
|
|
912 |
^ equivCount
|
|
913 |
|
|
914 |
"Modified: / 12-02-2010 / 13:56:10 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
915 |
!
|
|
916 |
|
|
917 |
fileData
|
|
918 |
"konstruktor"
|
16884
|
919 |
equivs := #().
|
|
920 |
undiscarded := #().
|
|
921 |
realindexes := #().
|
10014
|
922 |
nondiscardedLines := 0.
|
16884
|
923 |
changedFlag := #().
|
10014
|
924 |
!
|
|
925 |
|
|
926 |
fileData: data hashTable: h felDiff:fellDiffClass
|
|
927 |
| i size ir|
|
|
928 |
bufferedLines := data size.
|
|
929 |
|
|
930 |
equivs := Array new: bufferedLines withAll: 0.
|
|
931 |
|
|
932 |
undiscarded := Array new: bufferedLines withAll: 0.
|
|
933 |
|
|
934 |
realindexes := Array new: bufferedLines withAll: 0.
|
|
935 |
|
|
936 |
size := data size.
|
|
937 |
i := 1.
|
|
938 |
[i<=size]whileTrue: [ir := h at: (data at: i) ifAbsent: nil.
|
|
939 |
ir isNil
|
|
940 |
ifTrue: [
|
|
941 |
equivs at: i put:fellDiffClass equivMax.
|
|
942 |
fellDiffClass equivMax:( fellDiffClass equivMax + 1).
|
|
943 |
h at: (data at: i) put: (equivs at: i)]
|
|
944 |
ifFalse: [equivs at: i put: ir].
|
|
945 |
i:=i+1].
|
|
946 |
|
|
947 |
"Modified: / 12-02-2010 / 13:56:42 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
948 |
!
|
|
949 |
|
|
950 |
filterDiscards:discards
|
|
951 |
"Don't really discard the provisional lines except when they occur
|
|
952 |
in a run of discardables, with nonprovisionals at the beginning
|
|
953 |
and end."
|
|
954 |
|
|
955 |
|end i j length provisional bool consec minimum tem|
|
|
956 |
|
|
957 |
end := bufferedLines.
|
|
958 |
i := 0.
|
|
959 |
[ i < end ] whileTrue:[
|
|
960 |
"Cancel provisional discards not in middle of run of discards."
|
|
961 |
((discards at:i + 1) isNil) ifTrue:[
|
|
962 |
discards at:i + 1 put:0
|
|
963 |
].
|
|
964 |
(discards at:i + 1) = 2 ifTrue:[
|
|
965 |
discards at:i + 1 put:0
|
|
966 |
] ifFalse:[
|
|
967 |
(discards at:i + 1) = 0 ifFalse:[
|
|
968 |
"We have found a nonprovisional discard."
|
|
969 |
provisional := 0.
|
|
970 |
j := i.
|
|
971 |
bool := true.
|
|
972 |
"Find end of this run of discardable lines.
|
|
973 |
Count how many are provisionally discardable."
|
|
974 |
[ bool and:[ j < end ] ] whileTrue:[
|
|
975 |
(discards at:j + 1) = 2 ifTrue:[
|
|
976 |
provisional := provisional + 1
|
|
977 |
].
|
|
978 |
(discards at:j + 1) = 0 ifTrue:[
|
|
979 |
bool := false
|
|
980 |
] ifFalse:[ j := j + 1 ]
|
|
981 |
].
|
|
982 |
"Cancel provisional discards at end, and shrink the run."
|
|
983 |
[
|
|
984 |
j > i and:[ (discards at:j - 1 + 1) = 2 ]
|
|
985 |
] whileTrue:[
|
|
986 |
j := j - 1.
|
|
987 |
discards at:j + 1 put:0.
|
|
988 |
provisional := provisional - 1
|
|
989 |
].
|
|
990 |
"Now we have the length of a run of discardable lines
|
|
991 |
whose first and last are not provisional."
|
|
992 |
length := j - i.
|
|
993 |
(provisional * 4 > length) ifTrue:[
|
|
994 |
[ j > i ] whileTrue:[
|
|
995 |
j := j - 1.
|
|
996 |
(discards at:j + 1) = 2 ifTrue:[
|
|
997 |
discards at:j + 1 put:0
|
|
998 |
]
|
|
999 |
]
|
|
1000 |
] ifFalse:[
|
|
1001 |
"MINIMUM is approximate square root of LENGTH/4.
|
|
1002 |
A subrun of two or more provisionals can stand
|
|
1003 |
when LENGTH is at least 16.
|
|
1004 |
A subrun of 4 or more can stand when LENGTH >= 64."
|
|
1005 |
minimum := 1.
|
|
1006 |
tem := (length / 4) asInteger.
|
|
1007 |
tem := tem >> 2.
|
|
1008 |
[ tem > 0 ] whileTrue:[
|
|
1009 |
minimum := minimum * 2.
|
|
1010 |
tem := tem >> 2
|
|
1011 |
].
|
|
1012 |
minimum := minimum + 1.
|
|
1013 |
"Cancel any subrun of MINIMUM or more provisionals
|
|
1014 |
within the larger run."
|
|
1015 |
j := 0.
|
|
1016 |
consec := 0.
|
|
1017 |
[ j < length ] whileTrue:[
|
|
1018 |
(discards at:i + j + 1) ~= 2 ifTrue:[
|
|
1019 |
consec := 0
|
|
1020 |
] ifFalse:[
|
|
1021 |
consec := consec + 1.
|
|
1022 |
minimum = consec ifTrue:[
|
|
1023 |
"Back up to start of subrun, to cancel it all."
|
|
1024 |
j := j - consec
|
|
1025 |
] ifFalse:[
|
|
1026 |
discards at:i + j + 1 put:0
|
|
1027 |
]
|
|
1028 |
].
|
|
1029 |
j := j + 1
|
|
1030 |
].
|
|
1031 |
"Scan from beginning of run
|
|
1032 |
until we find 3 or more nonprovisionals in a row
|
|
1033 |
or until the first nonprovisional at least 8 lines in.
|
|
1034 |
Until that point, cancel any provisionals."
|
|
1035 |
j := 0.
|
|
1036 |
consec := 0.
|
|
1037 |
bool := true.
|
|
1038 |
[
|
|
1039 |
bool and:[ j < length ]
|
|
1040 |
] whileTrue:[
|
|
1041 |
(j >= 8 and:[ (discards at:i + j + 1) = 1 ]) ifTrue:[
|
|
1042 |
bool := false
|
|
1043 |
] ifFalse:[
|
|
1044 |
(discards at:i + j + 1) = 2 ifTrue:[
|
|
1045 |
consec := 0.
|
|
1046 |
discards at:i + j + 1 put:0
|
|
1047 |
] ifFalse:[
|
|
1048 |
(discards at:i + j + 1) = 0 ifTrue:[
|
|
1049 |
consec := 0
|
|
1050 |
] ifFalse:[
|
|
1051 |
consec := consec + 1
|
|
1052 |
]
|
|
1053 |
]
|
|
1054 |
].
|
|
1055 |
(consec = 3) ifTrue:[
|
|
1056 |
bool := false
|
|
1057 |
].
|
|
1058 |
j := j + 1
|
|
1059 |
].
|
|
1060 |
"I advances to the last line of the run."
|
|
1061 |
i := i + length - 1.
|
|
1062 |
bool := true.
|
|
1063 |
"Same thing, from end. "
|
|
1064 |
j := 0.
|
|
1065 |
consec := 0.
|
|
1066 |
[
|
|
1067 |
bool and:[ j < length ]
|
|
1068 |
] whileTrue:[
|
|
1069 |
(j >= 8 and:[ (discards at:i - j + 1) = 1 ]) ifTrue:[
|
|
1070 |
bool := false
|
|
1071 |
] ifFalse:[
|
|
1072 |
(discards at:i - j + 1) = 2 ifTrue:[
|
|
1073 |
consec := 0.
|
|
1074 |
discards at:i - j + 1 put:0
|
|
1075 |
] ifFalse:[
|
|
1076 |
(discards at:i - j + 1) = 0 ifTrue:[
|
|
1077 |
consec := 0
|
|
1078 |
] ifFalse:[
|
|
1079 |
consec := consec + 1
|
|
1080 |
]
|
|
1081 |
]
|
|
1082 |
].
|
|
1083 |
(consec = 3) ifTrue:[
|
|
1084 |
bool := false
|
|
1085 |
].
|
|
1086 |
j := j + 1
|
|
1087 |
]
|
|
1088 |
]
|
|
1089 |
]
|
|
1090 |
].
|
|
1091 |
i := i + 1.
|
|
1092 |
]
|
|
1093 |
!
|
|
1094 |
|
|
1095 |
shiftBoundaries:f
|
|
1096 |
"Adjust inserts/deletes of blank lines to join changes
|
|
1097 |
as much as possible.
|
|
1098 |
We do something when a run of changed lines include a blank
|
|
1099 |
line at one end and have an excluded blank line at the other.
|
|
1100 |
We are free to choose which blank line is included.
|
|
1101 |
`compareseq' always chooses the one at the beginning,
|
|
1102 |
but usually it is cleaner to consider the following blank line
|
|
1103 |
to be the change. The only exception is if the preceding blank line
|
|
1104 |
would join this change to other changes.
|
|
1105 |
param f the file being compared against"
|
|
1106 |
|
|
1107 |
|changed otherChanged i j iEnd preceding otherPreceding bool start end otherStart bool2|
|
|
1108 |
|
|
1109 |
changed := changedFlag.
|
|
1110 |
otherChanged := f changedFlag.
|
|
1111 |
i := 0.
|
|
1112 |
j := 0.
|
|
1113 |
iEnd := bufferedLines.
|
|
1114 |
preceding := -1.
|
|
1115 |
otherPreceding := -1.
|
|
1116 |
bool := true.
|
|
1117 |
bool2 := true.
|
|
1118 |
[ bool ] whileTrue:[
|
|
1119 |
[
|
|
1120 |
"Scan forwards to find beginning of another run of changes.
|
|
1121 |
Also keep track of the corresponding point in the other file. "
|
|
1122 |
i < iEnd and:[ ((changed at:(i + 1+1)) = false)]
|
|
1123 |
] whileTrue:[
|
|
1124 |
[otherChanged at:( 1 + j +1)] whileTrue:[
|
|
1125 |
"Non-corresponding lines in the other file
|
|
1126 |
will count as the preceding batch of changes."
|
|
1127 |
j := j + 1.
|
|
1128 |
otherPreceding := j.
|
|
1129 |
].
|
|
1130 |
j:=j+1.
|
|
1131 |
i := i + 1.
|
|
1132 |
].
|
|
1133 |
|
|
1134 |
(i >= iEnd) ifTrue:[
|
|
1135 |
bool := false.
|
|
1136 |
] ifFalse:[
|
|
1137 |
start := i.
|
|
1138 |
otherStart := j.
|
|
1139 |
bool2 := true.
|
|
1140 |
"Now find the end of this run of changes."
|
|
1141 |
[ bool2 ] whileTrue:[
|
|
1142 |
[i < iEnd and:[ changed at:(i + 1+1) ]]
|
|
1143 |
whileTrue:[ i := i + 1. ].
|
|
1144 |
end := i.
|
|
1145 |
"If the first changed line matches the following unchanged one,
|
|
1146 |
and this run does not follow right after a previous run,
|
|
1147 |
and there are no lines deleted from the other file here,
|
|
1148 |
then classify the first changed line as unchanged
|
|
1149 |
and the following line as changed in its place. */
|
|
1150 |
|
|
1151 |
/* You might ask, how could this run follow right after another?
|
|
1152 |
Only because the previous run was shifted here."
|
|
1153 |
(end ~= iEnd and:[((equivs at:start+1) = (equivs at:end+1))
|
|
1154 |
and:[((otherChanged at:(j + 1+1)) = false)
|
|
1155 |
and:[false = ((preceding >= 0 and:[start = preceding]) or:[ otherPreceding >= 0 and:[ otherStart = otherPreceding ]])
|
|
1156 |
]
|
|
1157 |
]
|
|
1158 |
])
|
|
1159 |
ifTrue:[
|
|
1160 |
changed at:(1 + end+1) put:true.
|
|
1161 |
end := end + 1.
|
|
1162 |
changed at:(1 + start+1) put:false.
|
|
1163 |
start := start + 1.
|
|
1164 |
" Since one line-that-matches is now before this run
|
|
1165 |
instead of after, we must advance in the other file
|
|
1166 |
to keep in synch."
|
|
1167 |
i := i + 1.
|
|
1168 |
j := j + 1.
|
|
1169 |
]
|
|
1170 |
ifFalse:[ bool2 := false ].
|
|
1171 |
].
|
|
1172 |
preceding := i.
|
|
1173 |
otherPreceding := j.
|
|
1174 |
].
|
|
1175 |
].
|
|
1176 |
! !
|
|
1177 |
|
|
1178 |
!Diff::ForwardScript methodsFor:'default'!
|
|
1179 |
|
|
1180 |
buildScript:aChanged0 length0:aLen0 changed1:aChanged1 length1:aLen1
|
|
1181 |
"Scan the tables of which lines are inserted and deleted,
|
|
1182 |
producing an edit script in forward order."
|
|
1183 |
|
|
1184 |
|script i0 i1 line0 line1|
|
|
1185 |
script := nil.
|
|
1186 |
i0 := aLen0.
|
|
1187 |
i1 := aLen1.
|
|
1188 |
[i0 >= 0 or:[i1 >= 0]] whileTrue:
|
|
1189 |
[((aChanged0 at:i0 + 1) or:[aChanged1 at:i1 + 1])
|
|
1190 |
ifTrue:
|
|
1191 |
[line0 := i0.
|
|
1192 |
line1 := i1.
|
|
1193 |
"Find # lines changed here in each file."
|
|
1194 |
[aChanged0 at:i0 + 1] whileTrue:[i0 := i0 - 1].
|
|
1195 |
[aChanged1 at:i1 + 1] whileTrue:[i1 := i1 - 1].
|
|
1196 |
"Record this change."
|
|
1197 |
script := Diff::Change new
|
|
1198 |
newLine0:i0
|
|
1199 |
line1:i1
|
|
1200 |
deleted:line0 - i0
|
|
1201 |
inserted:line1 - i1
|
|
1202 |
next:script.].
|
|
1203 |
"We have reached lines in the two files that match each other."
|
|
1204 |
i0 := i0 - 1.
|
|
1205 |
i1 := i1 - 1.].
|
|
1206 |
^script.
|
|
1207 |
|
|
1208 |
"Modified: / 16-02-2010 / 22:49:18 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
1209 |
! !
|
|
1210 |
|
|
1211 |
!Diff::ReverseScript methodsFor:'default'!
|
|
1212 |
|
|
1213 |
buildScript:aChanged0 length0:aLen0 changed1:aChanged1 length1:aLen1
|
|
1214 |
"Scan the tables of which lines are inserted and deleted,
|
|
1215 |
producing an edit script in reverse order."
|
|
1216 |
|
|
1217 |
|script i0 i1 line0 line1|
|
|
1218 |
script := nil.
|
|
1219 |
i0 := 0.
|
|
1220 |
i1 := 0.
|
|
1221 |
[i0 < aLen0 or:[i1 < aLen1]] whileTrue:
|
|
1222 |
[((aChanged0 at:(1 + i0 + 1)) or:[aChanged1 at:(1 + i1 + 1)])
|
|
1223 |
ifTrue:
|
|
1224 |
[line0 := i0.
|
|
1225 |
line1 := i1.
|
|
1226 |
"Find # lines changed here in each file."
|
|
1227 |
[aChanged0 at:(1 + i0 + 1)] whileTrue:[i0 := i0 + 1].
|
|
1228 |
[aChanged1 at:(1 + i1 + 1)] whileTrue:[i1 := i1 + 1].
|
|
1229 |
"Record this change."
|
|
1230 |
script := Diff::Change new
|
|
1231 |
newLine0:line0
|
|
1232 |
line1:line1
|
|
1233 |
deleted:(i0 - line0)
|
|
1234 |
inserted:(i1 - line1)
|
|
1235 |
next:script.].
|
|
1236 |
"We have reached lines in the two files that match each other."
|
|
1237 |
i0 := i0 + 1.
|
|
1238 |
i1 := i1 + 1.].
|
|
1239 |
^script.
|
|
1240 |
|
|
1241 |
"Modified: / 12-02-2010 / 14:15:27 / Jan Vrany <jan.vrany@fit.cvut.cz>"
|
|
1242 |
! !
|
|
1243 |
|
|
1244 |
!Diff class methodsFor:'documentation'!
|
|
1245 |
|
|
1246 |
version_CVS
|
16884
|
1247 |
^ '$Header$'
|
10014
|
1248 |
!
|
|
1249 |
|
|
1250 |
version_SVN
|
16884
|
1251 |
^ '$Id$'
|
10014
|
1252 |
! !
|
12692
|
1253 |
|