Base64Coder.st
author Claus Gittinger <cg@exept.de>
Tue, 18 Dec 2018 12:52:15 +0100
changeset 4777 b22943151ce0
parent 4754 95158fac7fde
child 4891 d8c52483ab4c
permissions -rw-r--r--
#DOCUMENTATION by cg class: ZipStream class comment/format in: #compress:into: #uncompress:into:

"{ Encoding: utf8 }"

"
 COPYRIGHT (c) 2002 by eXept Software AG
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
"{ Package: 'stx:libbasic2' }"

"{ NameSpace: Smalltalk }"

BaseNCoder subclass:#Base64Coder
	instanceVariableNames:''
	classVariableNames:'Base64Mapping Base64ReverseMapping'
	poolDictionaries:''
	category:'System-Storage'
!

!Base64Coder class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 2002 by eXept Software AG
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"
    Instances of this class perform Base64 en- and decoding as defined in RFC 2045
    3 bytes are mapped to 4 characters, representing 6 bits each.
    The encoded string consists only of characters from the set:
        'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='

    The main entry point API is
        Base64Coder encode:aStringOrBytes
    and
        Base64Coder decode:aString

    If the decoder should return a string, use
        Base64Coder decodeAsString:aString.

    [author:]
        Stefan Vogel

    [see also:]
        RFC https://tools.ietf.org/html/rfc4648
        
    [instance variables:]

    [class variables:]
        Base64Mapping         String   Mapping from bytes (with 6 valid bits)
                                       to Base64 characters
        Base64ReverseMapping  Array    Mapping from Base64 characters to 6-bit-Bytes
"
!

examples
"
                                                                [exBegin]
   (Base64Coder encode:'queen%27s%20gambit') asString = 'cXVlZW4lMjdzJTIwZ2FtYml0'
                                                                [exEnd]

                                                                [exBegin]
   (Base64Coder decode:'cXVlZW4lMjdzJTIwZ2FtYml0') asString = 'queen%27s%20gambit'
                                                                [exEnd]

                                                                [exBegin]
   |data1 text data2|

   data1 := #[0 1 16r7F 16r80 16r81 16rFE 16rFF].
   text := Base64Coder encode:data1.
   data2 := Base64Coder decode:text.
   data2
                                                                [exEnd]

                                                                [exBegin]
   |coder|

   coder := Base64Coder on:'' writeStream.
   coder nextPutAll:#[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19].
   coder flush.
   coder contents inspect.
   coder reset.
   coder nextPut:254.
   coder contents inspect.
                                                                [exEnd]

                                                                [exBegin]
   |coder decoder|

   coder := Base64Coder on:'' writeStream.
   coder nextPutAll:#[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20].
   coder flush.

   decoder := Base64Coder on:(coder contents readStream).
   [decoder atEnd] whileFalse:[
      Transcript show:decoder next
   ].
   Transcript cr.
                                                                [exEnd]
                                                                [exBegin]
   |coder|

   coder := Base64Coder on:'' writeStream.
   coder nextPutAll:(0 to:200) asByteArray.
   coder flush.

   Transcript showCR:(coder contents).
                                                                [exEnd]
                                                                [exBegin]
   |bytes encoded decoded|

   bytes := #[0 0 0] copy.
   0 to:255 do:[:b1 |
       Transcript showCR:b1.  
       bytes at:1 put:b1.  
       0 to:255 do:[:b2 |
           bytes at:2 put:b2.  
           0 to:255 do:[:b3 |
               bytes at:3 put:b3.  
               encoded := Base64Coder encode:bytes.
               decoded := Base64Coder decode:encoded.
               self assert:(decoded = bytes).
           ]
       ]
   ].
                                                                [exEnd]
"
! !

!Base64Coder class methodsFor:'initialization'!

initializeMappings
    "initialize class variables"

    Base64Mapping isNil ifTrue:[
        "65 characters representing the 6-bit values from 0-63 and one pad character"
        Base64Mapping := 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='.
        Base64ReverseMapping := self reverseMappingFor:Base64Mapping.
    ].

    "
     Base64Mapping := nil.
     self initializeMappings
    "

    "Modified (comment): / 30-09-2018 / 15:39:44 / Claus Gittinger"
!

mapping
    ^ Base64Mapping

    "Created: / 30-09-2018 / 15:30:33 / Claus Gittinger"
!

reverseMapping
    ^ Base64ReverseMapping

    "Created: / 30-09-2018 / 15:30:40 / Claus Gittinger"
! !

!Base64Coder class methodsFor:'decoding'!

decode:aStringOrStream
    "because base64 decoding is used heavily in some protocols,
     a specially tuned version is provided here for the common case of decoding a string"

    aStringOrStream isString ifTrue:[
        ^ self fastDecodeString:aStringOrStream
    ].    
    ^ super decode:aStringOrStream.

    "Created: / 30-09-2018 / 14:14:51 / Claus Gittinger"
    "Modified: / 30-09-2018 / 16:58:21 / Claus Gittinger"
!

fastDecodeString:aString
    "because base64 decoding is used heavily in some protocols,
     a specially tuned version is provided here for the common case of decoding a string"

    ^ self fastDecodeString:aString asString:false

    "
     (Base64Coder encode:'queen%27s%20gambit') asString => 'cXVlZW4lMjdzJTIwZ2FtYml0'

     (Base64Coder fastDecodeString:'cXVlZW4lMjdzJTIwZ2FtYml0') asString => 'queen%27s%20gambit'
    "

    "Created: / 30-09-2018 / 14:36:58 / Claus Gittinger"
!

fastDecodeString:aString asString:asStringBoolean
    "because base64 decoding is used heavily in some protocols,
     a specially tuned version is provided here for the common case of decoding a string"

    |decoding revMapping|

    revMapping := self reverseMapping.
    revMapping isNil ifTrue:[
        self initializeMappings.
        revMapping := self reverseMapping.
    ].    
%{
    // overallocate by 3
#   define N_QUICKBUFFER 512
    if (__isString(aString)
     && __isByteArray(revMapping)) {
        unsigned char *_revMapping = __stringVal(revMapping);
        int numInChars = __stringSize(aString);
        char *in = __stringVal(aString);
        unsigned char quickBuffer[N_QUICKBUFFER+3];
        unsigned char *buffer = quickBuffer;
        int bufferSize = N_QUICKBUFFER;
        int outLen = 0;
        int charBuffer = 0;
        int nBitsOut = 0;
        int i;

        for (i=0; i<numInChars; i++) {
            char ch = in[i];
            int bits = -1;

            if (ch <= 127) {
                bits = _revMapping[(ch-1) & 0x7F];
            }    

            if ((unsigned)bits <= 0x3F) { 
                charBuffer = (charBuffer << 6) | bits;
                nBitsOut += 6;
                if (nBitsOut == 24) {
                    if ((outLen + 3) > bufferSize) {
                        if (buffer == quickBuffer) {
                            // overallocate by 3
                            buffer = (unsigned char *)malloc(bufferSize*2+3);
                            memcpy(buffer, quickBuffer, bufferSize);
                        } else {
                            buffer = (unsigned char *)realloc(buffer, bufferSize*2+3);
                        }
                        bufferSize = bufferSize * 2;
                    }
                    buffer[outLen] = (charBuffer >> 16) & 0xFF;
                    buffer[outLen+1] = (charBuffer >> 8) & 0xFF;
                    buffer[outLen+2] = (charBuffer) & 0xFF;
                    outLen += 3;
                    charBuffer = nBitsOut = 0;
                }
            } else {
                if ((unsigned)bits == 0x40) {
                    // end mark
                    // because of overallocation, there is no need to check for buffer-full condition here
                    if (nBitsOut == 12) {
                        // data has been padded to 12, skip 4 bits
                        // one more byte coming
                        charBuffer >>= 4;
                        nBitsOut -= 4;
                        buffer[outLen] = (charBuffer) & 0xFF;
                        outLen += 1;
                    } else if (nBitsOut == 18) {
                        // data has been padded to 18, skip 2 bits
                        charBuffer >>= 2;
                        nBitsOut -= 2;
                        buffer[outLen] = (charBuffer >> 8) & 0xFF;
                        buffer[outLen+1] = (charBuffer) & 0xFF;
                        outLen += 2;
                    }
                } else {
                    // ignore
                }    
            }
        }

        if (asStringBoolean == true) {
            decoding = __MKSTRING_L(buffer, outLen);
        } else {
            decoding = __MKBYTEARRAY(buffer, outLen);
        }
        if (buffer != quickBuffer) {
            free(buffer);
        }
        RETURN(decoding);
    }    
%}.
    decoding := super decode:aString.
    asStringBoolean ifTrue:[
        ^ decoding asString
    ].    
    ^ decoding
    
    "
     (Base64Coder encode:'queen%27s%20gambit') => 'cXVlZW4lMjdzJTIwZ2FtYml0'

     (Base64Coder decode:'cXVlZW4lMjdzJTIwZ2FtYml0') asString => 'queen%27s%20gambit'
     (Base64Coder fastDecodeString:'cXVlZW4lMjdzJTIwZ2FtYml0') asString => 'queen%27s%20gambit'
     (Base64Coder fastDecodeString:'cXVlZW4lMjdzJTIwZ2FtYml0' asString:true) => 'queen%27s%20gambit'

     (Base64Coder encode:'a') => 'YQ=='
     (Base64Coder fastDecodeString:'YQ==' asString:true) => 'a'

     (Base64Coder encode:'aa') => 'YWE='
     (Base64Coder fastDecodeString:'YWE=' asString:true) => 'aa'

     |data encoded|
     data := ByteArray new:100000.
     encoded := Base64Coder encode:data.
     Time millisecondsToRun:[
        10 timesRepeat:[
            Base64Coder decode:encoded.
        ]
     ] 
     
     |data encoded|
     data := ByteArray new:100000.
     encoded := Base64Coder encode:data.
     Time millisecondsToRun:[
        10 timesRepeat:[
            Base64Coder fastDecodeString:encoded.
        ]
     ]

    "

    "Created: / 30-09-2018 / 14:35:05 / Claus Gittinger"
    "Modified (comment): / 30-09-2018 / 16:57:52 / Claus Gittinger"
! !

!Base64Coder methodsFor:'encoding'!

nextPutByte:aByte
    "encode aByte on the output stream"

    |b1 "{ Class: SmallInteger }"
     b2 "{ Class: SmallInteger }" 
     b3 "{ Class: SmallInteger }"
     b4 "{ Class: SmallInteger }" 
     bufferedBytes "{ Class: SmallInteger }" |

    buffer := (buffer bitShift:8) bitOr:aByte.
    bits := bits + 8.
    bits == 24 ifTrue:[
        "RFC 2045 says: max 76 characters in one line"
        (lineLimit notNil and:[charCount >= lineLimit]) ifTrue:[
            stream cr.
            charCount := 0.
        ].
        bufferedBytes := buffer.
        
        b4 := bufferedBytes bitAnd:16r3F.
        b3 := (bufferedBytes bitShift:-6)  bitAnd:16r3F.
        b2 := (bufferedBytes bitShift:-12) bitAnd:16r3F.
        b1 := (bufferedBytes bitShift:-18) bitAnd:16r3F.
        buffer := bits := 0.
        
        stream nextPut:(mapping at:b1+1);
               nextPut:(mapping at:b2+1);
               nextPut:(mapping at:b3+1);
               nextPut:(mapping at:b4+1).

        charCount := charCount + 4.
    ].

    "Modified: / 26-08-2017 / 12:35:17 / cg"
    "Modified: / 30-09-2018 / 15:15:14 / Claus Gittinger"
! !

!Base64Coder methodsFor:'misc'!

flush
    "flush the remaining bits of buffer. 
     The number of bits in buffer is not a multiple of 6, so we pad
     the buffer and signal that padding has been done via $= characters."

    |b1 b2 b3 b4|
        
    bits == 0 ifTrue:[
        "buffer is empty, nothing to do"
        ^ self.
    ].
            
    bits == 8 ifTrue:[
        buffer := buffer bitShift:4.
        b4 := b3 := 64. "pad with '=='"
        b1 := (buffer bitShift:-6) bitAnd:16r3F.
        b2 := buffer bitAnd:16r3F.
    ] ifFalse:[
        bits = 16 ifTrue:[
            buffer := buffer bitShift:2.
            b4 := 64.        "pad with '='"
            b3 := buffer bitAnd:16r3F.
            b2 := (buffer bitShift:-6)  bitAnd:16r3F.
            b1 := (buffer bitShift:-12) bitAnd:16r3F.
        ]
    ].
    bits := buffer := 0.

    "RFC 2045 says: max 76 characters in one line"
    (lineLimit notNil and:[charCount >= lineLimit]) ifTrue:[
        stream cr.
        charCount := 0.
    ].

    stream nextPut:(mapping at:b1+1);
           nextPut:(mapping at:b2+1);
           nextPut:(mapping at:b3+1);
           nextPut:(mapping at:b4+1).
    charCount := charCount + 4.

    "Modified: / 30-09-2018 / 15:15:52 / Claus Gittinger"
! !

!Base64Coder methodsFor:'private'!

fillBuffer
    "fill buffer with next 4 characters each representing 6 bits.
     Used when decoding."

    |b  
     tempBuffer "{Class: SmallInteger}"
     _bits      "{Class: SmallInteger}" |

    tempBuffer := 0.
    _bits := 0.

    [
        "read next valid Base64 character, skip invalid characters"
        b := 255.
        [b == 255] whileTrue:[
            b := stream next.
            b isNil ifTrue:[ "end of stream"
                b := 64.     "simulate end-mark"
            ] ifFalse:[
                b := reverseMapping at:b codePoint ifAbsent:255.
            ]
        ].

        b == 64 ifTrue:[
            "got $=, end of Base64 string has been reached"
            atEnd := true.
            _bits == 12 ifTrue:[
                "data has been padded to 12, skip 4 bits"
                tempBuffer := tempBuffer bitShift:-4.
                _bits := _bits - 4.
            ] ifFalse:[_bits == 18 ifTrue:[
                "data has been padded to 18, skip 2 bits"
                tempBuffer := tempBuffer bitShift:-2.
                _bits := _bits - 2.
            ]].
        ] ifFalse:[
            "got valid Base64 character, append to buffer"
            tempBuffer := (tempBuffer bitShift:6) bitOr:b.
            _bits := _bits + 6.
        ].
        (_bits == 24 or:[atEnd]) ifTrue:[
            bits := _bits.
            buffer := tempBuffer.
            ^ self.
        ].
    ] loop.

    "Modified: / 30-09-2018 / 15:16:19 / Claus Gittinger"
! !

!Base64Coder class methodsFor:'documentation'!

version
    ^ '$Header$'
!

version_CVS
    ^ '$Header$'
! !