--- a/Character.st Sat Aug 15 18:09:26 2015 +0200
+++ b/Character.st Sat Aug 15 18:44:41 2015 +0200
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
"
COPYRIGHT (c) 1988 by Claus Gittinger
All Rights Reserved
@@ -511,11 +513,16 @@
- this should only be used for emergency evaluators and the like."
%{ /* NOCONTEXT */
+# include <errno.h>
+
int c;
- c = getchar();
- if (c < 0) {
- RETURN (nil);
+ for (;;) {
+ c = getchar();
+ if (c >= 0) break;
+ if (errno != EINTR) {
+ RETURN (nil);
+ }
}
RETURN ( __MKCHARACTER(c & 0xFF) );
%}.
@@ -594,7 +601,6 @@
"
! !
-
!Character methodsFor:'Compatibility-Dolphin'!
isAlphaNumeric
@@ -838,16 +844,16 @@
"return a character with same letter as the receiver, but in lowercase.
Returns the receiver if it is already lowercase or if there is no lowercase equivalent.
CAVEAT:
- for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
- (which is more than mozilla does, btw. ;-)"
+ for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
+ (which is more than mozilla does, btw. ;-)"
%{
#ifdef __SCHTEAM__
{
- char ch = self.charValue("[asLowercase]");
-
- ch = java.lang.Character.toLowerCase(ch);
- return context._RETURN(STCharacter._new(ch));
+ char ch = self.charValue("[asLowercase]");
+
+ ch = java.lang.Character.toLowerCase(ch);
+ return context._RETURN(STCharacter._new(ch));
}
/* NOTREACHED */
#else
@@ -985,29 +991,41 @@
REGISTER int * __p;
__codePoint = __intVal(__INST(asciivalue));
+
+ // comon ascii stuff first
+ if (__codePoint < 0x80) {
+ if ((__codePoint >= 'A') && (__codePoint <= 'Z')) {
+ unsigned newCodePoint;
+
+ newCodePoint = __codePoint - 'A' + 'a';
+ RETURN (__MKCHARACTER(newCodePoint)) ;
+ }
+ RETURN (self);
+ }
+
for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 3) {
- unsigned rangeStart, rangeSize, rangeEnd, mod;
-
- rangeStart = (unsigned)__p[0];
- if (__codePoint < rangeStart) break;
-
- rangeSize = ((unsigned)__p[1]) >> 8;
- rangeEnd = rangeStart + rangeSize;
- if (__codePoint <= rangeEnd) {
- mod = __p[1] & 0xFF;
- if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
- OBJ newChar;
- unsigned newCodePoint;
-
- newCodePoint = __codePoint + __p[2];
- if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
- RETURN (__MKCHARACTER(newCodePoint)) ;
- }
- newChar = __MKUCHARACTER(newCodePoint) ;
- if (newChar == nil) goto allocationError;
- RETURN (newChar) ;
- }
- }
+ unsigned rangeStart, rangeSize, rangeEnd, mod;
+
+ rangeStart = (unsigned)__p[0];
+ if (__codePoint < rangeStart) break;
+
+ rangeSize = ((unsigned)__p[1]) >> 8;
+ rangeEnd = rangeStart + rangeSize;
+ if (__codePoint <= rangeEnd) {
+ mod = __p[1] & 0xFF;
+ if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
+ OBJ newChar;
+ unsigned newCodePoint;
+
+ newCodePoint = __codePoint + __p[2];
+ if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
+ RETURN (__MKCHARACTER(newCodePoint)) ;
+ }
+ newChar = __MKUCHARACTER(newCodePoint) ;
+ if (newChar == nil) goto allocationError;
+ RETURN (newChar) ;
+ }
+ }
}
RETURN (self);
allocationError: ;
@@ -1106,28 +1124,39 @@
REGISTER unsigned short *__p;
__codePoint = __intVal(__INST(asciivalue));
- for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 2) {
- if ((__codePoint == __p[0]) || (__codePoint == __p[1])) {
- short newCodePoint;
- OBJ newChar;
-
- newCodePoint = __p[1];
- if (newCodePoint == __codePoint) {
- RETURN (self);
- }
- if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
- RETURN (__MKCHARACTER(newCodePoint)) ;
- }
- newChar = __MKUCHARACTER(newCodePoint) ;
- if (newChar == nil) goto getOutOfHere;
- RETURN (newChar) ;
- }
+ if ((__codePoint > 0x01C0) && (__codePoint < 0x01FF)) {
+ for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 2) {
+ if ((__codePoint == __p[0]) || (__codePoint == __p[1])) {
+ short newCodePoint;
+ OBJ newChar;
+
+ newCodePoint = __p[1];
+ if (newCodePoint == __codePoint) {
+ RETURN (self);
+ }
+ if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
+ RETURN (__MKCHARACTER(newCodePoint)) ;
+ }
+ newChar = __MKUCHARACTER(newCodePoint) ;
+ if (newChar == nil) goto getOutOfHere;
+ RETURN (newChar) ;
+ }
+ }
}
+ if (__codePoint < 0x80) {
+ // do it here for common ascii characters
+ if ((__codePoint >= 'a') && (__codePoint <= 'z')) {
+ unsigned char newCodePoint = __codePoint - 'a' + 'A';
+ RETURN (__MKCHARACTER(newCodePoint)) ;
+ }
+ RETURN (self) ;
+ }
+
ch = self;
getOutOfHere: ;
%}.
ch notNil ifTrue:[
- ^ ch asUppercase.
+ ^ ch asUppercase.
].
^ ObjectMemory allocationFailureSignal raise.
@@ -1136,6 +1165,7 @@
$A asTitlecase
$a asTitlecase
(Character value:16r01F1) asTitlecase
+ (Character value:16r01F2) asTitlecase
"
!
@@ -1153,16 +1183,16 @@
"return a character with same letter as the receiver, but in uppercase.
Returns the receiver if it is already uppercase or if there is no uppercase equivalent.
CAVEAT:
- for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
- (which is more than mozilla does, btw. ;-)"
+ for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
+ (which is more than mozilla does, btw. ;-)"
%{
#ifdef __SCHTEAM__
{
- char ch = self.charValue("[asUppercase]");
-
- ch = java.lang.Character.toUpperCase(ch);
- return context._RETURN(STCharacter._new(ch));
+ char ch = self.charValue("[asUppercase]");
+
+ ch = java.lang.Character.toUpperCase(ch);
+ return context._RETURN(STCharacter._new(ch));
}
/* NOTREACHED */
#else
@@ -1312,29 +1342,41 @@
REGISTER int *__p;
__codePoint = __intVal(__INST(asciivalue));
+
+ // comon ascii stuff first
+ if (__codePoint < 0x80) {
+ if ((__codePoint >= 'a') && (__codePoint <= 'z')) {
+ unsigned newCodePoint;
+
+ newCodePoint = __codePoint - 'a' + 'A';
+ RETURN (__MKCHARACTER(newCodePoint)) ;
+ }
+ RETURN (self);
+ }
+
for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 3) {
- unsigned rangeStart, rangeSize, rangeEnd, mod;
-
- rangeStart = (unsigned)__p[0];
- if (rangeStart > __codePoint) break;
-
- rangeSize = ((unsigned)__p[1]) >> 8;
- rangeEnd = rangeStart + rangeSize;
- if (__codePoint <= rangeEnd) {
- mod = __p[1] & 0xFF;
- if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
- OBJ newChar;
- unsigned newCodePoint;
-
- newCodePoint = __codePoint + __p[2];
- if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
- RETURN (__MKCHARACTER(newCodePoint)) ;
- }
- newChar = __MKUCHARACTER(newCodePoint) ;
- if (newChar == nil) goto allocationError;
- RETURN (newChar) ;
- }
- }
+ unsigned rangeStart, rangeSize, rangeEnd, mod;
+
+ rangeStart = (unsigned)__p[0];
+ if (rangeStart > __codePoint) break;
+
+ rangeSize = ((unsigned)__p[1]) >> 8;
+ rangeEnd = rangeStart + rangeSize;
+ if (__codePoint <= rangeEnd) {
+ mod = __p[1] & 0xFF;
+ if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
+ OBJ newChar;
+ unsigned newCodePoint;
+
+ newCodePoint = __codePoint + __p[2];
+ if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
+ RETURN (__MKCHARACTER(newCodePoint)) ;
+ }
+ newChar = __MKUCHARACTER(newCodePoint) ;
+ if (newChar == nil) goto allocationError;
+ RETURN (newChar) ;
+ }
+ }
}
RETURN (self);
allocationError: ;
@@ -1457,7 +1499,7 @@
^ s contents
"
- 'ä' utf8Encoded
+ 'ä' utf8Encoded
"
! !
@@ -2523,9 +2565,9 @@
"
$e asNonDiacritical
- $é asNonDiacritical
- $ä asNonDiacritical
- $å asNonDiacritical
+ $é asNonDiacritical
+ $ä asNonDiacritical
+ $Ã¥ asNonDiacritical
"
!
@@ -3060,3 +3102,4 @@
version_CVS
^ '$Header$'
! !
+