author | Claus Gittinger <cg@exept.de> |
Thu, 27 Jun 2019 15:21:18 +0200 | |
changeset 24377 | 2fbcb2fa6c43 |
parent 22478 | e511b09f7a97 |
permissions | -rw-r--r-- |
22427 | 1 |
"{ Encoding: utf8 }" |
2 |
||
8148 | 3 |
" |
4 |
COPYRIGHT (c) 2004 by eXept Software AG |
|
5 |
All Rights Reserved |
|
6 |
||
7 |
This software is furnished under a license and may be used |
|
8 |
only in accordance with the terms of that license and with the |
|
9 |
inclusion of the above copyright notice. This software may not |
|
10 |
be provided or otherwise made available to, or used by, any |
|
11 |
other person. No title to or ownership of the software is |
|
12 |
hereby transferred. |
|
13 |
" |
|
8081 | 14 |
"{ Package: 'stx:libbasic' }" |
15 |
||
16 |
"{ NameSpace: CharacterEncoderImplementations }" |
|
17 |
||
22478 | 18 |
FixedBytesEncoder subclass:#SingleByteEncoder |
8081 | 19 |
instanceVariableNames:'' |
20 |
classVariableNames:'' |
|
21 |
poolDictionaries:'' |
|
22 |
category:'Collections-Text-Encodings' |
|
23 |
! |
|
24 |
||
8148 | 25 |
!SingleByteEncoder class methodsFor:'documentation'! |
26 |
||
27 |
copyright |
|
28 |
" |
|
29 |
COPYRIGHT (c) 2004 by eXept Software AG |
|
30 |
All Rights Reserved |
|
31 |
||
32 |
This software is furnished under a license and may be used |
|
33 |
only in accordance with the terms of that license and with the |
|
34 |
inclusion of the above copyright notice. This software may not |
|
35 |
be provided or otherwise made available to, or used by, any |
|
36 |
other person. No title to or ownership of the software is |
|
37 |
hereby transferred. |
|
38 |
" |
|
8811 | 39 |
! |
40 |
||
41 |
documentation |
|
42 |
" |
|
43 |
Common abstract superclass for all single-byte encodings. |
|
44 |
||
45 |
[author:] |
|
46 |
Claus Gittinger |
|
47 |
" |
|
8148 | 48 |
! ! |
8081 | 49 |
|
50 |
!SingleByteEncoder class methodsFor:'queries'! |
|
51 |
||
21307 | 52 |
isAbstract |
53 |
"Return if this class is an abstract class. |
|
54 |
True is returned here; false for subclasses. |
|
55 |
Abstract subclasses must redefine this again." |
|
56 |
||
57 |
^ self == CharacterEncoderImplementations::SingleByteEncoder |
|
58 |
! |
|
59 |
||
8081 | 60 |
maxCode |
61 |
^ 255 |
|
22478 | 62 |
! ! |
63 |
||
64 |
!SingleByteEncoder methodsFor:'encoding & decoding'! |
|
65 |
||
66 |
decodeString:anEncodedStringOrByteCollection |
|
67 |
"given a string in my encoding, return a unicode-string for it" |
|
68 |
||
69 |
|newString myCode uniCodePoint bits size "{ Class:SmallInteger }"| |
|
70 |
||
71 |
size := anEncodedStringOrByteCollection size. |
|
72 |
newString := String new:size. |
|
73 |
bits := newString bitsPerCharacter. |
|
74 |
||
75 |
1 to:size do:[:idx | |
|
76 |
uniCodePoint := (anEncodedStringOrByteCollection at:idx) codePoint. |
|
77 |
myCode := self decode:uniCodePoint. |
|
78 |
myCode > 16rFF ifTrue:[ |
|
79 |
myCode > 16rFFFF ifTrue:[ |
|
80 |
bits < 32 ifTrue:[ |
|
81 |
newString := Unicode32String fromString:newString. |
|
82 |
bits := 32. |
|
83 |
] |
|
84 |
] ifFalse:[ |
|
85 |
bits < 16 ifTrue:[ |
|
86 |
newString := Unicode16String fromString:newString. |
|
87 |
bits := 16. |
|
88 |
] |
|
89 |
] |
|
90 |
]. |
|
91 |
newString at:idx put:(Character value:myCode). |
|
92 |
]. |
|
93 |
^ newString |
|
94 |
||
95 |
" |
|
96 |
CharacterEncoderImplementations::ISO8859_1 decodeString:'hello' |
|
97 |
" |
|
98 |
||
99 |
"Created: / 16-01-2018 / 19:54:02 / stefan" |
|
100 |
"Modified (format): / 17-01-2018 / 14:15:00 / stefan" |
|
8081 | 101 |
! |
102 |
||
22478 | 103 |
encodeString:aUnicodeString |
104 |
"given a string in unicode, return a string in my encoding for it" |
|
105 |
||
106 |
|newString myCode stringSize "{ Class: SmallInteger }"| |
|
107 |
||
108 |
stringSize := aUnicodeString size. |
|
109 |
newString := String new:stringSize. |
|
110 |
||
111 |
1 to:stringSize do:[:idx | |
|
112 |
myCode := self encode:((aUnicodeString at:idx) codePoint). |
|
113 |
newString at:idx put:(Character codePoint:myCode). |
|
114 |
]. |
|
115 |
^ newString |
|
116 |
||
117 |
"Created: / 16-01-2018 / 19:53:33 / stefan" |
|
118 |
"Modified: / 17-01-2018 / 14:15:39 / stefan" |
|
8081 | 119 |
! ! |
120 |
||
11997 | 121 |
!SingleByteEncoder methodsFor:'queries'! |
122 |
||
22427 | 123 |
characterSize:charOrCodePoint |
21140 | 124 |
"return the number of bytes required to encode codePoint" |
11997 | 125 |
|
22478 | 126 |
^ 1 |
11997 | 127 |
|
128 |
"Created: / 15-06-2005 / 15:11:24 / janfrog" |
|
22478 | 129 |
"Modified (format): / 16-01-2018 / 19:50:39 / stefan" |
130 |
! ! |
|
131 |
||
132 |
!SingleByteEncoder methodsFor:'stream support'! |
|
133 |
||
134 |
encodeCharacter:aUnicodeCharacterOrCodePoint on:aStream |
|
135 |
"given a character in unicode, encode it onto aStream. |
|
136 |
Subclasses can redefine this to avoid allocating many new string instances." |
|
137 |
||
138 |
aStream nextPutByte:(self encode:aUnicodeCharacterOrCodePoint codePoint). |
|
139 |
||
140 |
" |
|
141 |
CharacterEncoderImplementations::ISO8859_10 new encodeCharacter:260 on:Transcript |
|
142 |
CharacterEncoderImplementations::ISO8859_10 new encodeCharacter:$Ą on:Transcript |
|
143 |
" |
|
144 |
||
145 |
"Created: / 17-01-2018 / 15:07:39 / stefan" |
|
146 |
! |
|
147 |
||
148 |
readNext:charactersToRead charactersFrom:stream |
|
149 |
^ self decodeString:(stream next:charactersToRead) |
|
150 |
! |
|
151 |
||
152 |
readNextCharacterFrom:aStream |
|
153 |
| code | |
|
154 |
||
155 |
code := aStream nextByte. |
|
156 |
||
157 |
^ code isNil |
|
158 |
ifTrue: [nil] |
|
159 |
ifFalse: [Character codePoint:(self decode:code)] |
|
160 |
||
161 |
"Created: / 16-01-2018 / 20:05:20 / stefan" |
|
162 |
"Modified: / 17-01-2018 / 15:19:03 / stefan" |
|
11997 | 163 |
! ! |
164 |
||
8081 | 165 |
!SingleByteEncoder class methodsFor:'documentation'! |
166 |
||
167 |
version |
|
19826 | 168 |
^ '$Header$' |
12433
b8654ecbc50d
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11997
diff
changeset
|
169 |
! |
b8654ecbc50d
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11997
diff
changeset
|
170 |
|
b8654ecbc50d
added: #readNextInputCharacterFrom:
Claus Gittinger <cg@exept.de>
parents:
11997
diff
changeset
|
171 |
version_CVS |
19826 | 172 |
^ '$Header$' |
8081 | 173 |
! ! |
19826 | 174 |