# HG changeset patch # User Stefan Vogel # Date 1516369445 -3600 # Node ID 5b8c1f5f8ffab725f7962d6ba947d57ceae0be3a # Parent b30058f26971005263e535a4116b4cea938192d8 #REFACTORING by stefan class: CharacterEncoderImplementations::ISO10646_to_SGML class definition added: #characterSize: #readNextCharacterFrom: removed: #decode: #encode: comment/format in: #decodeString: changed: #encodeString: diff -r b30058f26971 -r 5b8c1f5f8ffa CharacterEncoderImplementations__ISO10646_to_SGML.st --- a/CharacterEncoderImplementations__ISO10646_to_SGML.st Fri Jan 19 14:43:55 2018 +0100 +++ b/CharacterEncoderImplementations__ISO10646_to_SGML.st Fri Jan 19 14:44:05 2018 +0100 @@ -1,3 +1,5 @@ +"{ Encoding: utf8 }" + " COPYRIGHT (c) 2004 by eXept Software AG All Rights Reserved @@ -13,7 +15,7 @@ "{ NameSpace: CharacterEncoderImplementations }" -TwoByteEncoder subclass:#ISO10646_to_SGML +VariableBytesEncoder subclass:#ISO10646_to_SGML instanceVariableNames:'' classVariableNames:'' poolDictionaries:'' @@ -49,10 +51,6 @@ !ISO10646_to_SGML methodsFor:'encoding & decoding'! -decode:aCode - self shouldNotImplement "/ no single byte conversion possible -! - decodeString:aStringOrByteCollection "given a string in SGML encoding (i.e. with SGML escaped characters), return a new string containing the same characters, in 16bit (or more) encoding. @@ -61,11 +59,11 @@ This only handles up-to 30bit characters." |nBits ch - in out codePoint t| + in out codePoint| nBits := 8. in := aStringOrByteCollection readStream. - out := WriteStream on:(String new:10). + out := CharacterWriteStream on:(String new:10). [in atEnd] whileFalse:[ ch := in next. ch == $& ifTrue:[ @@ -78,24 +76,7 @@ codePoint := (codePoint * 10) + ch digitValue. in next. ]. - codePoint > 16rFF ifTrue:[ - codePoint > 16rFFFF ifTrue:[ - nBits < 32 ifTrue:[ - t := out contents. - out := WriteStream on:(Unicode32String fromString:t). - out position:t size. - nBits := 32. - ] - ] ifFalse:[ - nBits < 16 ifTrue:[ - t := out contents. - out := WriteStream on:(Unicode16String fromString:t). - out position:t size. - nBits := 16. - ] - ] - ]. - out nextPut:(Character value:codePoint). + out nextPut:(Character codePoint:codePoint). in peekOrNil == $; ifTrue:[ in next. ] @@ -115,10 +96,8 @@ CharacterEncoderImplementations::ISO10646_to_SGML decodeString:'#197;&bn...' " -! -encode:aCode - self shouldNotImplement "/ no single byte conversion possible + "Modified: / 17-01-2018 / 18:35:52 / stefan" ! encodeString:aUnicodeString @@ -126,33 +105,77 @@ The resulting string is only useful to be stored on some external file, not for being used inside ST/X." - |ch in out codePoint| + |in out| in := aUnicodeString readStream. - out := WriteStream on:(String new:10). + out := WriteStream on:(String new:aUnicodeString size + 10). [in atEnd] whileFalse:[ + |ch codePoint| + ch := in next. codePoint := ch codePoint. (codePoint between:16r20 and:16r7F) ifTrue:[ out nextPut:ch. ] ifFalse:[ out nextPutAll:'&#'. - out nextPutAll:(codePoint printString). - out nextPutAll:';'. + codePoint printOn:out. + out nextPut:$;. ]. ]. ^ out contents " CharacterEncoderImplementations::ISO10646_to_SGML - encodeString:'hello äöü' + encodeString:'hello äöü' " "Modified: / 23-10-2006 / 13:25:27 / cg" + "Modified (format): / 17-01-2018 / 18:41:16 / stefan" +! ! + +!ISO10646_to_SGML methodsFor:'queries'! + +characterSize:aCharacter + |codePoint| + + codePoint := aCharacter codePoint. + (codePoint between:16r20 and:16r7F) ifTrue:[ + ^ 1. + ]. + ^ codePoint printString size + 3 "#&1234;" + + "Created: / 17-01-2018 / 18:01:40 / stefan" +! ! + +!ISO10646_to_SGML methodsFor:'stream support'! + +readNextCharacterFrom:aStream + |char codePoint| + + char := aStream next. + (char ~~ $# and:[aStream peek ~~ $&]) ifTrue:[ + ^ char. + ]. + aStream next. + + codePoint := 0. + [char := aStream peekOrNil. + char notNil and:[char isDigit] + ] whileTrue:[ + codePoint := (codePoint * 10) + char digitValue. + aStream next. + ]. + aStream peekOrNil == $; ifTrue:[ + aStream next. + ]. + ^ Character codePoint:codePoint. + + "Created: / 17-01-2018 / 18:37:40 / stefan" ! ! !ISO10646_to_SGML class methodsFor:'documentation'! version - ^ '$Header: /cvs/stx/stx/libbasic/CharacterEncoderImplementations__ISO10646_to_SGML.st,v 1.3 2006-10-23 11:25:11 cg Exp $' + ^ '$Header$' ! ! +