Refactor timeout scaling factor initialization jv
authorJan Vrany <jan.vrany@fit.cvut.cz>
Fri, 24 Aug 2018 11:23:42 +0100
branchjv
changeset 52967d1c7df7f90
parent 528 32e722badd92
child 530 d000086cab5e
Refactor timeout scaling factor initialization

In order to stabilize tests on (heavily loaded) CI infrastructure,
reasses scaling factor every three minutes. Also include (simple)
I/O benchmark to compensate for (overloaded) I/O subsystem of
CI slaves.

For details, see `Builder::TestReport::Result >> timeoutScale`
reports/Builder__TestReport.st
     1.1 --- a/reports/Builder__TestReport.st	Mon Jul 02 08:46:02 2018 +0200
     1.2 +++ b/reports/Builder__TestReport.st	Fri Aug 24 11:23:42 2018 +0100
     1.3 @@ -10,8 +10,9 @@
     1.4  !
     1.5  
     1.6  TestResult subclass:#Result
     1.7 -	instanceVariableNames:'format time npassed nfailed nerror nskipped collector'
     1.8 -	classVariableNames:'TimeoutScale'
     1.9 +	instanceVariableNames:'format time timeoutScale timeoutScaleReassesmentTime npassed
    1.10 +		nfailed nerror nskipped collector'
    1.11 +	classVariableNames:''
    1.12  	poolDictionaries:''
    1.13  	privateIn:TestReport
    1.14  !
    1.15 @@ -322,6 +323,51 @@
    1.16      ^nskipped
    1.17  
    1.18      "Created: / 31-01-2013 / 13:54:42 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    1.19 +!
    1.20 +
    1.21 +timeoutScale
    1.22 +    "Return a timeout scaling factor used to adjust test-case defined timeout.
    1.23 +     This is required in order to stabilize tests on different computers and setups.
    1.24 +     Slower machines may need higher timeout otherwise tests may fail."
    1.25 +
    1.26 +    "/ This scaling factor is computed automatically by running a (not so) simple 
    1.27 +    "/ benchmark. To make things more complicated, we have to re-asses the scaling
    1.28 +    "/ factor time to time as machine performance may vary depending on a load - this
    1.29 +    "/ happens specially on CI setups where CI slaves are virtualized and running on
    1.30 +    "/ heavily-loaded systems.
    1.31 +    "/ 
    1.32 +    "/ The benchmark itself consists of:
    1.33 +    "/ 
    1.34 +    "/    * CPU benchmark - to handle systems with slow CPUs such as some
    1.35 +    "/      low-end Celerons on RPi-kind of thing)
    1.36 +    "/    * IO benchmark - to handle the case of slow IO on overloaded host
    1.37 +    "/      running many tests in parallel.
    1.38 +    "/ 
    1.39 +
    1.40 +
    1.41 +    (timeoutScale isNil or: [timeoutScaleReassesmentTime < OperatingSystem getMillisecondTime ]) ifTrue:[ 
    1.42 +        | time1 files time2 scale1 scale2 |
    1.43 +
    1.44 +        "/ simple CPU benchmark
    1.45 +        time1 := Time millisecondsToRun:[3000 timesRepeat: [ 2000 factorial ]].
    1.46 +        scale1 := time1 / 2500"mean value of an i5 64bit".
    1.47 +
    1.48 +        files := (Smalltalk getPackageDirectoryForPackage: self class package) recursiveDirectoryContentsAsFilenames 
    1.49 +                    select: [:e | e isRegularFile and:[e suffix = 'st']].
    1.50 +        time2 := Time millisecondsToRun:[ 10 timesRepeat: [ files shuffled do:[:e|e contents ] ] ].
    1.51 +        scale2 := time2 / 2500"mean value of an 64bit linux ext4 on SATA SSD".
    1.52 +
    1.53 +        timeoutScale := (scale1 max: scale2) max: 1.
    1.54 +        timeoutScaleReassesmentTime := OperatingSystem getMillisecondTime + (1000*60*3) "/ reasses every three minutes
    1.55 +    ].
    1.56 +    ^ timeoutScale
    1.57 +
    1.58 +    "
    1.59 +    Builder::TestReport::Result new timeoutScale; timeoutScale
    1.60 +    "
    1.61 +
    1.62 +    "Created: / 24-08-2018 / 09:54:57 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    1.63 +    "Modified (comment): / 24-08-2018 / 11:20:15 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    1.64  ! !
    1.65  
    1.66  !TestReport::Result methodsFor:'adding'!
    1.67 @@ -386,15 +432,10 @@
    1.68      super initialize.
    1.69  
    1.70      npassed := nfailed := nerror := nskipped := 0.
    1.71 -    TimeoutScale isNil ifTrue:[ 
    1.72 -        | bench |
    1.73 -
    1.74 -        bench := Time millisecondsToRun:[3000 timesRepeat: [ 2000 factorial ]].
    1.75 -        TimeoutScale := (bench / 2500"mean value of an i5 32bit") max: 1.
    1.76 -    ].
    1.77 +    timeoutScaleReassesmentTime := 0
    1.78  
    1.79      "Created: / 31-01-2013 / 13:52:33 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    1.80 -    "Modified: / 14-11-2016 / 23:37:40 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    1.81 +    "Modified: / 24-08-2018 / 10:58:55 / Jan Vrany <jan.vrany@fit.cvut.cz>"
    1.82  ! !
    1.83  
    1.84  !TestReport::Result methodsFor:'running'!
    1.85 @@ -498,7 +539,7 @@
    1.86  
    1.87      Logger trace: 'Running %1>>%2' with: aTestCase nameForHDTestReport with: aTestCase selectorForHDTestReport.
    1.88      Transcript show:'F'.
    1.89 -    timeout := (timeoutBase * TimeoutScale) rounded. 
    1.90 +    timeout := (timeoutBase * self timeoutScale) rounded. 
    1.91      sema := Semaphore new.
    1.92      stime := OperatingSystem getMillisecondTime.
    1.93      log := false.
    1.94 @@ -520,8 +561,8 @@
    1.95  
    1.96      etime := OperatingSystem getMillisecondTime.
    1.97      timeouted ifTrue:[
    1.98 -        error := TimeoutError new messageText: ('Timed out (effective %1ms, base %2ms, scale %3)' bindWith: timeout with: timeoutBase with:TimeoutScale) .
    1.99 -	outcome isNil ifTrue:[ outcome := self createOutcome ].
   1.100 +        error := TimeoutError new messageText: ('Timed out (effective %1ms, base %2ms, scale %3)' bindWith: timeout with: timeoutBase with: timeoutScale asFloat) .
   1.101 +        outcome isNil ifTrue:[ outcome := self createOutcome ].
   1.102          outcome result: TestResult stateError.
   1.103          format 
   1.104              writeTestCase: aTestCase outcome: outcome 
   1.105 @@ -536,14 +577,14 @@
   1.106      Transcript cr.
   1.107  
   1.108      "Created: / 12-01-2012 / 17:42:12 / Jan Vrany <jan.vrany@fit.cvut.cz>"
   1.109 -    "Modified: / 16-11-2016 / 23:26:07 / Jan Vrany <jan.vrany@fit.cvut.cz>"
   1.110 +    "Modified: / 24-08-2018 / 11:19:24 / Jan Vrany <jan.vrany@fit.cvut.cz>"
   1.111  !
   1.112  
   1.113  unixForkCase:aTestCase debugged: debugged timeout: timeoutBase
   1.114      | timeout pid status sema stime etime error stack suiteFailuresBefore suiteErrorsBefore log logPos |
   1.115  
   1.116      Transcript show:'forking...'.
   1.117 -    timeout := (timeoutBase * TimeoutScale) rounded.   
   1.118 +    timeout := (timeoutBase * self timeoutScale) rounded.   
   1.119      sema := Semaphore new.
   1.120      stime := OperatingSystem getMillisecondTime.
   1.121      log := false.
   1.122 @@ -593,7 +634,7 @@
   1.123                      [ log := true.
   1.124                      status isNil 
   1.125                          ifTrue:
   1.126 -                            [ error := TimeoutError new messageText: ('Timed out (effective %1ms, base %2ms, scale %3)' bindWith: timeout with: timeoutBase with:TimeoutScale) .
   1.127 +                            [ error := TimeoutError new messageText: ('Timed out (effective %1ms, base %2ms, scale %3)' bindWith: timeout with: timeoutBase with:timeoutScale asFloat) .
   1.128                              stack := 'Oops, timed out!! (timeout was ' , timeout printString , ' sec)'. ]
   1.129                          ifFalse:
   1.130                              [ status status == #signal 
   1.131 @@ -630,7 +671,7 @@
   1.132      Transcript cr.
   1.133  
   1.134      "Created: / 12-01-2012 / 17:43:07 / Jan Vrany <jan.vrany@fit.cvut.cz>"
   1.135 -    "Modified: / 16-11-2016 / 23:26:36 / Jan Vrany <jan.vrany@fit.cvut.cz>"
   1.136 +    "Modified: / 24-08-2018 / 11:19:44 / Jan Vrany <jan.vrany@fit.cvut.cz>"
   1.137  ! !
   1.138  
   1.139  !TestReport class methodsFor:'documentation'!