Depth24Image.st
changeset 7531 00b7a56ef891
parent 7530 5c500f48ef49
child 7535 d8d9480fdec2
--- a/Depth24Image.st	Wed Aug 31 12:15:23 2016 +0200
+++ b/Depth24Image.st	Wed Aug 31 13:46:44 2016 +0200
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
 "
  COPYRIGHT (c) 1993 by Claus Gittinger
 	      All Rights Reserved
@@ -2646,6 +2648,149 @@
     "Modified: / 27.7.1998 / 20:03:02 / cg"
 !
 
+easy2RotateBitsInto:destinationImage angle:degrees
+    "tuned helper for rotation - does the actual pixel shuffling.
+     by degrees clockwise. 
+     Here, only 90, 180 and 270 degrees are implemented. 
+     Hard angles are done in #hardRotate:.
+     The code here a tuned version of the inherited for more performance"
+
+    |srcBytesPerRow dstBytesPerRow
+     srcWidth srcHeight
+     dstWidth dstHeight
+     srcBytes dstBytes|
+
+    srcBytesPerRow := self bytesPerRow.
+    srcBytes := self bits.
+    srcWidth := width.
+    srcHeight := height.
+
+    dstBytesPerRow := destinationImage bytesPerRow.
+    dstBytes := destinationImage bits.
+    dstWidth := destinationImage width.
+    dstHeight := destinationImage height.
+%{
+    if (__bothSmallInteger(srcWidth, srcHeight)
+     && __bothSmallInteger(dstWidth, dstHeight)
+     && __bothSmallInteger(dstBytesPerRow, srcBytesPerRow)
+     && __isByteArrayLike(srcBytes)
+     && __isByteArrayLike(dstBytes)
+    ) {
+        int c_srcW = __intVal(srcWidth);
+        int c_srcH = __intVal(srcHeight);
+        int c_dstW = __intVal(dstWidth);
+        int c_dstH = __intVal(dstHeight);
+        int c_srcBytesPerRow = __intVal(srcBytesPerRow);
+        int c_dstBytesPerRow = __intVal(dstBytesPerRow);
+        int c_srcW4 = c_srcW-4;
+
+        // I tried three versions here;
+        //   a) compute dstX/y inside the loop,
+        //   b) precompute a transformation matrix and compute inside the loop w.o conditional branches
+        //   c) precompute ptr-delta and use those to step to next destination pixel.
+
+        // the funny result:
+        //   all show roughly the same speed on a modern CPU.
+        // So the whole thing is mostly limited by memory-bandwidth,
+        // and there is not much to do, unless we move to a better memory fetch/store
+        // (eg. mmx or similar vector stuff)
+        // The version below orecomputes the delta, so it should run fast on older CPUs as well.
+
+        {
+            unsigned char *c_srcBytes = (unsigned char*)__ByteArrayInstPtr(srcBytes)->ba_element;
+            unsigned char *c_dstBytes = (unsigned char*)__ByteArrayInstPtr(dstBytes)->ba_element;
+
+            int c_dstNextRowOffset;
+            int c_dstNextColOffset;
+
+            if (degrees == __mkSmallInteger(90)) {
+                // destinationImage pixelAtX:(h-row) y:col put:pixel
+                c_dstNextRowOffset = -3;                                // going to previous column
+                c_dstNextColOffset = c_dstBytesPerRow;                  // going to next row
+                c_dstBytes += (c_dstW-1)*3;                             // start in the upper-right of dest
+            } else if (degrees == __mkSmallInteger(180)) {
+                // destinationImage pixelAtX:(w-col) y:(h-row) put:pixel
+                c_dstNextRowOffset = -c_dstBytesPerRow;                 // going to previous row
+                c_dstNextColOffset = -3;                                // going to prev col
+                c_dstBytes += (c_dstH-1)*c_dstBytesPerRow+(c_dstW-1)*3; // start in the lower-right of dest
+            } else {
+                // destinationImage pixelAtX:row y:(w-col) put:pixel
+                c_dstNextRowOffset = 3;                                 // going to next col
+                c_dstNextColOffset = -c_dstBytesPerRow;                 // going to prev row
+                c_dstBytes += (c_dstH-1)*c_dstBytesPerRow;              // start in the lower-left of dest
+            }
+
+            if ((c_srcBytesPerRow * c_srcH) <= __byteArraySize(srcBytes)) {
+                int c_y;
+
+                for (c_y=0; c_y<c_srcH; c_y++) {
+                    unsigned char *c_srcNext = c_srcBytes + c_srcBytesPerRow;
+                    unsigned char *c_dstNext = c_dstBytes + c_dstNextRowOffset;
+                    int c_x;
+
+                    c_x = 0;    
+#if 1
+                    if (sizeof(int) == 4) {
+                        for (c_x = 0; c_x < c_srcW4; c_x += 4) {
+                            int c_value1, c_value2, c_value3;
+
+                            c_value1 = ((int *)c_srcBytes)[0];
+                            c_value2 = ((int *)c_srcBytes)[1];
+                            c_value3 = ((int *)c_srcBytes)[2];
+
+                            c_dstBytes[0] = (c_value1) & 0xFF;
+                            c_dstBytes[1] = (c_value1>>8) & 0xFF;
+                            c_dstBytes[2] = (c_value1>>16) &0xFF;
+                            c_dstBytes += c_dstNextColOffset;
+
+                            c_dstBytes[0] = (c_value1>>24) & 0xFF;
+                            c_dstBytes[1] = (c_value2) & 0xFF;
+                            c_dstBytes[2] = (c_value2>>8) &0xFF;
+                            c_dstBytes += c_dstNextColOffset;
+
+                            c_dstBytes[0] = (c_value2>>16) & 0xFF;
+                            c_dstBytes[1] = (c_value2>>24) & 0xFF;
+                            c_dstBytes[2] = (c_value3) &0xFF;
+                            c_dstBytes += c_dstNextColOffset;
+
+                            c_dstBytes[0] = (c_value3>>8) & 0xFF;
+                            c_dstBytes[1] = (c_value3>>16) & 0xFF;
+                            c_dstBytes[2] = (c_value3>>24) &0xFF;
+                            c_dstBytes += c_dstNextColOffset;
+
+                            c_srcBytes += (3*4);
+                        }
+                    }
+#endif
+                    for (; c_x < c_srcW; c_x++) {
+                        int c_rValue, c_gValue, c_bValue;
+
+                        c_rValue = c_srcBytes[0];
+                        c_gValue = c_srcBytes[1];    
+                        c_bValue = c_srcBytes[2];
+
+                        c_dstBytes[0] = c_rValue;
+                        c_dstBytes[1] = c_gValue;
+                        c_dstBytes[2] = c_bValue;
+
+                        c_srcBytes += 3;
+                        c_dstBytes += c_dstNextColOffset;
+                    }
+                    c_srcBytes = c_srcNext;
+                    c_dstBytes = c_dstNext;
+                }
+                RETURN(self);
+            }
+        }
+    }
+%}.
+    self halt.
+    super easy2RotateBitsInto:destinationImage angle:degrees
+
+    "Modified: 11.7.1996 / 20:06:47 / cg"
+    "Created: 11.7.1996 / 20:08:11 / cg"
+!
+
 valuesAtY:y from:xLow to:xHigh do:aBlock
     "perform aBlock for each pixelValue from x1 to x2 in row y.
      The block is passed the pixelValue at each pixel.