CRC32Stream.st
author Claus Gittinger <cg@exept.de>
Sat, 02 May 2020 21:40:13 +0200
changeset 5476 7355a4b11cb6
parent 4917 985b36efa9e6
permissions -rw-r--r--
#FEATURE by cg class: Socket class added: #newTCPclientToHost:port:domain:domainOrder:withTimeout: changed: #newTCPclientToHost:port:domain:withTimeout:

"{ Encoding: utf8 }"

"
 COPYRIGHT (c) 2003 by eXept Software AG
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
"{ Package: 'stx:libbasic2' }"

"{ NameSpace: Smalltalk }"

CRCStream subclass:#CRC32Stream
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'System-Crypt-Hashing'
!

CRC32Stream subclass:#CRC32Stream_Castagnoli
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	privateIn:CRC32Stream
!

!CRC32Stream primitiveDefinitions!
%{
#if defined(__x86__) || defined(__x86_64__)

# if defined(__GNUC__)
#  define HAS_CRC
# endif

# if defined(__clang__)
#  define HAS_CRC
# endif

# if defined(__MINGW__) || defined(__GNUC__)
#  ifdef __LP64__
#   define uint64_t       __uint64__
#  endif
#  ifndef __CLANG__
    typedef unsigned int   uint32_t;
    typedef unsigned short uint16_t;
    typedef unsigned char  uint8_t;
#  endif
# endif

# ifdef HAS_CRC

#  if __POINTER_SIZE__ == 8
static inline __uint64__
__crc32_u64(__uint64__ crc, __uint64__ value) {
  asm("crc32q %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
  return crc;
}
#  endif

static inline uint32_t
__crc32_u32(uint32_t crc, uint32_t value) {
  asm("crc32l %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
  return crc;
}

static inline uint32_t
__crc32_u16(uint32_t crc, uint16_t value) {
  asm("crc32w %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
  return crc;
}

static inline uint32_t
__crc32_u8(uint32_t crc, uint8_t value) {
  asm("crc32b %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
  return crc;
}

# endif // HAS_CRC
#endif // x86 or x86_64
%}
! !

!CRC32Stream class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 2003 by eXept Software AG
	      All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    Only use CRC to protect against communication errors;
    DO NOT use CRC for cryptography, authentication, security, etc.
    - use secure hashes for those instead.

    Standard CRC method as defined by ISO 3309 [ISO-3309] or ITU-T V.42 [ITU-T-V42].
    The default CRC polynomial employed is

        x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1
        (or 16r04C11DB7)

    You can also create an instance performing the Castagnoli CRC-32C
    (used in iSCSI & SCTP [RFC3720], G.hn payload, SSE4.2):

        self newCrc32c

     the polynomial is: 16r1edc6f41
        = x32 + x28 + x27 + x26 + x25 + x23 + x22 + x20 + x19 + x18 + x14 + x13 + x11 + x10 + x9 + x8 + x6 + 1

    Notice that this CRC is also used with PNG images;
    therefore, its performance directly affects png image processing (png write speed).

    throughput:
        235 Mb/s on MacBook Pro (2.6Ghz I7) 
                (360 Mb/s for big chunks of size 50k)
                (100 Mb/s for small chunks of size 10)
        157 Mb/s on a 2.5Ghz 64X2 Athlon 4800+ (64bit)
        150 Mb/s on 2Ghz Duo
        
     new:
        500 Mb/s for castagnoli on MacBook Pro (2.6Ghz I7) 
                (5Gb/s for big chunks of size 50k)

    [author:]
        Stefan Vogel (stefan@zwerg)

    [instance variables:]

    [class variables:]

    [see also:]
        SHA1Stream
        MD5Stream

"
!

examples
"

  expect 60C1D0A0
								[exBegin]
    self information:(CRC32Stream hashValueOf:'resume') hexPrintString
								[exEnd]

  expect 16r60C1D0A0
								[exBegin]
    self information:(CRC32Stream new
			    nextPut:$r;
			    nextPut:$e;
			    nextPut:$s;
			    nextPut:$u;
			    nextPut:$m;
			    nextPut:$e;
			    hashValue) hexPrintString
								[exEnd]

  expect 16r70E46888:
								[exBegin]
    self information:(CRC32Stream hashValueOf:#[1 2 3 4 5 6 7]) hexPrintString
								[exEnd]

  expect 16r8CD04C73:
								[exBegin]
    self information:((CRC32Stream hashValueOf:#[16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF
	     16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF
	     16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF
	     16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF 16rFF]) hexPrintString)
								[exEnd]

  timing throughput:
  230Mb/s (on MacBook Pro 2012 / 2.6Ghz I7)
								[exBegin]
    |hashStream n t|

    hashStream := CRC32Stream new.
    n := 2000000.
    t := Time millisecondsToRun:[
	    n timesRepeat:[
		hashStream nextPutAll:'12345678901234567890123456789012345678901234567890'.
	    ].
	 ].
    t := (t / 1000) asFloat.
    Transcript show:'crc32:'; showCR: hashStream hashValue hexPrintString.
    Transcript show:t; show:' seconds for '; show:(50*n/1024/1024) asFloat; showCR:' Mb'.
    Transcript show:(n*50/1024/1024 / t); showCR:' Mb/s'
								[exEnd]

  500Mb/s (on MacBook Pro 2012 / 2.6Ghz I7)
								[exBegin]
    |hashStream s n t l|

    hashStream := CRC32Stream newCastagnoli.
    n := 2000000.
    s := '12345678901234567890123456789012345678901234567890'.
    l := s size.
    t := Time millisecondsToRun:[
	    n timesRepeat:[
		hashStream nextPutAll:'12345678901234567890123456789012345678901234567890'.
	    ].
	 ].
    t := (t / 1000) asFloat.
    Transcript show:'crc32:'; showCR: hashStream hashValue hexPrintString.
    Transcript show:t; show:' seconds for '; show:(l*n/1024/1024) asFloat; showCR:' Mb'.
    Transcript show:(n*l/1024/1024 / t); showCR:' Mb/s'
								[exEnd]
  the real speed is shown with longer inputs...
								[exBegin]
    |hashStream n t l s|

    hashStream := CRC32Stream newCastagnoli.
    n := 20000.
    s := '1234567890' ,* 10000.
    l := s size.
    t := Time millisecondsToRun:[
	    n timesRepeat:[
		hashStream nextPutAll:s
	    ].
	 ].
    t := (t / 1000) asFloat.
    Transcript show:'crc32:'; showCR: hashStream hashValue hexPrintString.
    Transcript show:t; show:' seconds for '; show:(l*n/1024/1024) asFloat; showCR:' Mb'.
    Transcript show:(n*l/1024/1024 / t); showCR:' Mb/s'
								[exEnd]

  test vectors from https://tools.ietf.org/html/rfc3720#page-217:

  expect 0
								[exBegin]
    self information:(CRC32Stream newCrc32c hashValueOf:'') hexPrintString
								[exEnd]
  expect C1D04330
								[exBegin]
    self information:(CRC32Stream newCrc32c hashValueOf:'a') hexPrintString
								[exEnd]
  expect E3069283
								[exBegin]
    self information:(CRC32Stream newCrc32c hashValueOf:'123456789') hexPrintString
								[exEnd]
  expect 8A9136AA
								[exBegin]
    self information:(CRC32Stream newCrc32c hashValueOf:(ByteArray new:32 withAll:0)) hexPrintString
								[exEnd]
  expect 62a8ab43
								[exBegin]
    self information:(CRC32Stream newCrc32c hashValueOf:(ByteArray new:32 withAll:16rFF)) hexPrintString
								[exEnd]
  expect 46dd794e
								[exBegin]
    self information:(CRC32Stream newCrc32c hashValueOf:(0 to:31) asByteArray) hexPrintString
								[exEnd]

"
! !

!CRC32Stream class methodsFor:'instance creation'!

generatorPolynomMSB:anInteger
    "notice, in literature, the generator polynom is usually specified as an MSB number"

    ^ self generatorPolynom:(anInteger bitReversed32)

    "
       self assert:((self generatorPolynom:16r82F63B78)
				nextPut:'123456789';
				hashValue)    = 16rE3069283
    "

    "Created: / 16-03-2019 / 20:55:46 / Claus Gittinger"
!

generatorPolynomMSB:anInteger initValue:initValueArg
    "notice, in literature, the generator polynom is usually specified as an MSB number"

    ^ self generatorPolynom:(anInteger bitReversed32) initValue:initValueArg

    "Created: / 16-03-2019 / 21:11:31 / Claus Gittinger"
!

generatorPolynomMSB:anInteger initValue:initValueArg xorOut:xorOut
    "notice, in literature, the generator polynom is usually specified as an MSB number"

    ^ self generatorPolynom:(anInteger bitReversed32) initValue:initValueArg xorOut:xorOut

    "Created: / 16-03-2019 / 21:21:27 / Claus Gittinger"
!

new
    "return an instance of the ITU-T CRC-32"

    ^ self newCCITT

    "Created: / 16-03-2019 / 21:09:19 / Claus Gittinger"
    "Modified: / 16-03-2019 / 23:29:45 / Claus Gittinger"
    "Modified (comment): / 17-03-2019 / 14:34:47 / Claus Gittinger"
!

newCCITT
    "return an instance of the ITU-T CRC-32
	x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1"

    "/ 16r4C11DB7 bitReversed32 -> 16rEDB88320
    ^ self generatorPolynomMSB:16r4C11DB7
	   initValue:16rFFFFFFFF xorOut:16rFFFFFFFF

    "Created: / 16-03-2019 / 23:29:39 / Claus Gittinger"
    "Modified: / 17-03-2019 / 14:07:36 / Claus Gittinger"
!

newCastagnoli
    "return an instance of the Castagnoli CRC-32
	x32 + x28 + x27 + x26 + x25 + x23 + x22 + x20 + x19 + x18 + x14 + x13 + x11 + x10 + x9 + x8 + x6 + 1
     (used in iSCSI & SCTP, G.hn payload, SSE4.2)"

    |impl|

    impl := (self canUseFastCRC) ifTrue:[CRC32Stream_Castagnoli] ifFalse:[self].

    "/ 16r1edc6f41 bitReversed32 -> 16r82F63B78
    ^ impl generatorPolynomMSB:16r1edc6f41
	   initValue:16rFFFFFFFF xorOut:16rFFFFFFFF

    "
     Castagnoli crc:
       self assert:((self newCrc32c)
				nextPut:'123456789';
				hashValue) = 3808858755. '16rE3069283'

     default crc:
       self assert:((self new)
				nextPut:'123456789';
				hashValue) = 3421780262. '16rCBF43926'
    "

    "Created: / 17-03-2019 / 14:33:54 / Claus Gittinger"
    "Modified: / 17-03-2019 / 17:08:32 / Claus Gittinger"
!

newCrc32c
    "return an instance of the Castagnoli CRC-32"

    ^ self newCastagnoli

    "
     Castagnoli crc:
       self assert:((self newCrc32c)
				nextPut:'123456789';
				hashValue) = 3808858755. '16rE3069283'

     default crc:
       self assert:((self new)
				nextPut:'123456789';
				hashValue) = 3421780262. '16rCBF43926'
    "

    "Modified (comment): / 17-05-2012 / 12:48:53 / cg"
    "Modified: / 17-03-2019 / 14:34:26 / Claus Gittinger"
! !

!CRC32Stream class methodsFor:'private'!

canUseFastCRC
%{ /* NOCONTEXT */
#if defined(HAS_CRC)
# if defined(__x86__) || defined(__x86_64__)
#  if defined(__clang__) || defined(__GNUC__)
    RETURN (__cpu_hasSSE4_2 ? true : false);
#  endif
# endif
#endif
%}.
    ^ false

    "
     self canUseFastCRC
    "

    "Created: / 17-03-2019 / 16:45:42 / Claus Gittinger"
! !

!CRC32Stream class methodsFor:'queries'!

hashSize
    "return the size of the hashvalue returned by instances of this class (in bytes)"

    ^ 4

    "Created: / 24-03-2019 / 12:52:47 / Claus Gittinger"
! !

!CRC32Stream methodsFor:'initialization'!

generatorPolynom:anLSBInteger
    "set the generator polynom for this instance.
     set start and xorOut to 16rFFFFFFFF.
     Note: you have to set the bit-reversed value, so the LSB must be first"

    self generatorPolynom:anLSBInteger
	 initValue:16rFFFFFFFF xorOut:16rFFFFFFFF.

    "Modified: / 16-03-2019 / 21:23:25 / Claus Gittinger"
    "Modified (format): / 17-03-2019 / 14:02:38 / Claus Gittinger"
!

generatorPolynom:anLSBInteger initValue:initValueArg
    "set the generator polynom for this instance.
     set start to initValueArg and xorOut to 16rFFFFFFFF.
     Note: you have to set the bit-reversed value, so the LSB must be first"

    self generatorPolynom:anLSBInteger
	 initValue:initValueArg xorOut:16rFFFFFFFF

    "Created: / 16-03-2019 / 21:06:16 / Claus Gittinger"
    "Modified (format): / 17-03-2019 / 14:02:31 / Claus Gittinger"
! !

!CRC32Stream::CRC32Stream_Castagnoli class methodsFor:'documentation'!

documentation
"
    redefined to use the CPU's CRC instruction

    [author:]
	Claus Gittinger
"
! !

!CRC32Stream::CRC32Stream_Castagnoli methodsFor:'writing'!

nextPutBytes:count from:anObject startingAt:start
    "add the hash of anObject to the computed hash so far.
     This uses the x86/x86_64 crc instruction,
     and works only for the castagnoli polynom"

%{  
#if defined(HAS_CRC)

    int len, offs;

    // fetch first - check later
    len = __intVal(count);
    offs = __intVal(start) - 1;

    if (__bothSmallInteger(count, start)) {
        int objSize;
        unsigned char *extPtr;

        // the most common first
        if (__isStringLike(anObject)) {
            objSize = __stringSize(anObject);
            extPtr = (unsigned char *)__stringVal(anObject);
        } else if (__isByteArray(anObject)) {
            objSize = __byteArraySize(anObject);
            extPtr = (unsigned char *)__byteArrayVal(anObject);
        } else if (__isExternalBytesLike(anObject)) {
            OBJ sz = __externalBytesSize(anObject);

            extPtr = (unsigned char *)__externalBytesAddress(anObject);
            if (__isSmallInteger(sz)) {
                objSize = __intVal(sz);
            } else {
                objSize = 0; /* unknown */
            }
        } else {
            int nInstVars, nInstBytes;
            OBJ oClass = __Class(anObject);

            switch (__intVal(__ClassInstPtr(oClass)->c_flags) & ARRAYMASK) {
                case BYTEARRAY:
                case WORDARRAY:
                case LONGARRAY:
                case SWORDARRAY:
                case SLONGARRAY:
                case FLOATARRAY:
                case DOUBLEARRAY:
                    break;
                default:
                    goto bad;
            }
            nInstVars = __intVal(__ClassInstPtr(oClass)->c_ninstvars);
            nInstBytes = __OBJS2BYTES__(nInstVars);
            // nInstBytes is the number of bytes occupied by pointer instance variables
            // subtract from size and add to byte-pointer
            objSize = __qSize(anObject) - OHDR_SIZE - nInstBytes;
            extPtr = (unsigned char *)__byteArrayVal(anObject)+nInstBytes;
        }

        if ((offs >= 0) && (len >= 0) && (objSize >= (len + offs))) {
            unsigned int _crc;
            unsigned int *_crcTable = __integerArrayVal( __INST(crcTable) );
            unsigned char *cp = extPtr+offs;
            unsigned int n = len;

            _crc = (unsigned int) (__intVal( __INST(crc) ));
            if (!__isSmallInteger(__INST(crc))) {
                _crc = __unsignedLongIntVal( __INST(crc) );
            }

# if __POINTER_SIZE__ == 8
            if (((unsigned INT)cp & 7) == 0) {
                // longword aligned
                for ( ; n >= 32 ; n -= 32, cp += 32) {
                    unsigned INT w0 = ((unsigned INT *)cp)[0];
                    unsigned INT w1, w2, w3;

                    _crc = __crc32_u64(_crc, w0);
                    w1 = ((unsigned INT *)cp)[1];
                    _crc = __crc32_u64(_crc, w1);
                    w2 = ((unsigned INT *)cp)[2];
                    _crc = __crc32_u64(_crc, w2);
                    w3 = ((unsigned INT *)cp)[3];
                    _crc = __crc32_u64(_crc, w3);
                }
                for ( ; n >= 8 ; n -= 8, cp += 8) {
                    _crc = __crc32_u64(_crc, ((unsigned INT *)cp)[0]);
                }
            }
# endif // __POINTER_SIZE__ == 8
            if (((unsigned INT)cp & 3) == 0) {
                // word aligned
                for ( ; n >= 4 ; n -= 4, cp += 4) {
                    _crc = __crc32_u32(_crc, ((unsigned int *)cp)[0]);
                }
            }
            for ( ; n >= 4 ; n -= 4, cp += 4) {
                _crc = __crc32_u8(_crc, cp[0]);
                _crc = __crc32_u8(_crc, cp[1]);
                _crc = __crc32_u8(_crc, cp[2]);
                _crc = __crc32_u8(_crc, cp[3]);
            }
            while (n-- > 0) {
                unsigned char ch = *cp++;

                _crc = __crc32_u8(_crc, ch);
            }

            if ((__POINTER_SIZE__==8) || (_crc <= _MAX_INT)) {
                __INST(crc) = __MKSMALLINT(_crc);
            } else {
                // this code fails with gcc 4.7.2:
                // __INST(crc) = __MKUINT(_crc); __STORESELF(crc);
                OBJ temp = __MKUINT(_crc);
                __INST(crc) = temp; __STORESELF(crc);
            }

            RETURN (count);
        }
    }
bad: ;
#endif
%}.
    self primitiveFailed

    "Created: / 17-03-2019 / 16:44:27 / Claus Gittinger"
    "Modified: / 25-03-2019 / 11:18:11 / Claus Gittinger"
! !

!CRC32Stream class methodsFor:'documentation'!

version
    ^ '$Header$'
!

version_CVS
    ^ '$Header$'
! !