Unicode32String.st
author Jan Vrany <jan.vrany@labware.com>
Wed, 30 Jun 2021 14:07:56 +0100
branchjv
changeset 5481 19d6355dc3e1
parent 4769 89914ccfcf7d
permissions -rw-r--r--
Cherry-picked `Unicode32String` from 48677b66883e: cherry-picked Unicode32String.st from 48677b66883e: * cb05c61f9204: #FEATURE by stefan, Stefan Vogel <sv@exept.de> * 5f6a992925c2: #DOCUMENTATION by stefan, Stefan Vogel <sv@exept.de> * 45176601c636: #BUGFIX by exept, Claus Gittinger <cg@exept.de> * d6f50be034db: #REFACTORING by stefan, Stefan Vogel <sv@exept.de> * ae8ed6040c96: #REFACTORING by stefan, Stefan Vogel <sv@exept.de>

"{ Encoding: utf8 }"

"
 COPYRIGHT (c) 2004 by eXept Software AG 
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
"{ Package: 'stx:libbasic2' }"

"{ NameSpace: Smalltalk }"

FourByteString variableLongSubclass:#Unicode32String
	instanceVariableNames:''
	classVariableNames:''
	poolDictionaries:''
	category:'Collections-Text'
!

!Unicode32String class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 2004 by eXept Software AG 
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.


"
!

documentation
"
    Not yet fully finished - unicode support is still being implemented.
"
! !

!Unicode32String class methodsFor:'initialization'!

initialize
    "initialize the class - private"

    self flags:(Behavior flagLongs)

    "
     Unicode32String initialize
    "

    "Created: 30.6.1997 / 15:35:52 / cg"
    "Modified: 30.6.1997 / 15:39:21 / cg"
! !

!Unicode32String class methodsFor:'reading'!

readFrom:aStreamOrString onError:exceptionBlock
    "read & return the next String from the (character-)stream aStream;
     skipping all whitespace first; return the value of exceptionBlock,
     if no string can be read. The sequence of characters as read from the
     stream must be one as stored via storeOn: or storeString."

    "
     this method is not to be inherited
     (i.e. not ok for subclasses; Symbol, for example)
    "
    self ~~ Unicode32String ifTrue:[
        ^ super readFrom:aStreamOrString onError:exceptionBlock
    ].
    ^ self readSmalltalkStringFrom:aStreamOrString onError:exceptionBlock

    "
        self readFrom:'abcäöü' storeString
        String readFrom:'abcäöü' storeString
    "
! !

!Unicode32String methodsFor:'conversion'!

asUnicode16StringReplaceInvalidWith:replacementCharacter
    "return the receiver converted to a 'normal' string,
     with invalid characters replaced by replacementCharacter.
     Can be used to convert from 16-bit strings to 8-bit strings
     and replace characters above code-255 with some replacement."

    |newString sz "{ Class:SmallInteger }"|

    sz := self size.

    newString := Unicode16String new:sz.
    1 to:sz do:[:idx |
        |char|

        char := self at:idx.
        char codePoint <= 16rFFFF ifTrue:[
            newString at:idx put:char
        ] ifFalse:[
            newString at:idx put:replacementCharacter
        ].
    ].
    ^ newString

    "Created: / 28-05-2019 / 12:13:57 / Stefan Vogel"
!

asUnicode32String
    "as the receiver already is a unicode-32 string, return it"

    ^ self
!

asUnicodeString
    "as the receiver already is a unicode string, return it"

    ^ self
!

displayOn:aGCOrStream
    "display myself as on aStream.
     You must use an ISO10646 unicode font to display this string"

    "/ what a kludge - Dolphin and Squeak mean: printOn: a stream;
    "/ old ST80 means: draw-yourself on a GC.
    aGCOrStream isStream ifFalse:[
        ^ super displayOn:aGCOrStream
    ].

    aGCOrStream
        nextPut:$';
        nextPutAllUnicode:self;
        nextPut:$'.

    "Modified: / 22-02-2017 / 16:56:32 / cg"
!

printOn:aStream
    "print the receiver on aStream. 
     Let aStream decide how to represent this, whether UTF-8, UTF-16, ..."

    aStream nextPutAllUnicode:self

    "Modified (comment): / 27-07-2013 / 15:37:03 / cg"
! !

!Unicode32String methodsFor:'printing & storing'!

storeOn:aStream
    "put the storeString of myself on aStream.
     Convert it to something that can be written to a plain ASCII file.
     Use #unicodeStoreOn: if you want to use the result internally,
     especially when writing to an EncodedStream."
    
    self storeUtf8On:aStream.

    "
        String streamContents:[:s|
            'hello' asUnicode32String storeOn:s
        ].

        String streamContents:[:s|
            'hello -öäüßщ' asUnicode32String storeOn:s
        ].
    "

    "Modified: / 28-09-2011 / 16:18:43 / cg"
    "Modified (comment): / 15-10-2020 / 20:00:50 / Stefan Vogel"
!

storeString
    "return a String for storing myself.
     Convert it to something that can be written to a plain ASCII file.
     Use #unicodeStoreString if you want to use the result internally,
     especially when writing to an EncodedStream."

    self containsNon7BitAscii ifTrue:[
        "non-7bit string has been utf8Encoded"
        ^ (self utf8Encoded basicStoreString),' utf8Decoded'.
    ] ifFalse:[
        ^ self basicStoreString.
    ].

    "
        'hello' asUnicode32String storeString
        'hello -öäüß' storeString
        'hello öäüßщ' asUnicode32String storeString
        'hello öäüßщ' asUnicode32String unicodeStoreString
    "

    "Modified: / 15-10-2020 / 20:02:42 / Stefan Vogel"
! !

!Unicode32String methodsFor:'testing'!

isLiteral
    "return true, if the receiver can be used as a literal constant in ST syntax
     (i.e. can be used in constant arrays)"

    ^ true
!

isUnicode32String
    "true if this is a 4-byte unicode string"

    ^ true
!

isUnicodeString
    "true if this is a 2- or 4-byte unicode string
     (i.e. not a single byte string).
     Notice, that the name is misleading: 
     all strings use unicode encoding"

    ^ true

    "Modified (comment): / 21-11-2019 / 18:26:05 / Stefan Vogel"
! !

!Unicode32String class methodsFor:'documentation'!

version
    ^ '$Header$'
!

version_CVS
    ^ '$Header$'
! !


Unicode32String initialize!