class: Character
authorClaus Gittinger <cg@exept.de>
Sat, 15 Aug 2015 18:44:41 +0200
changeset 18691 0b9e9bc60d61
parent 18690 f0a013daec58
child 18692 442b51ab0e41
child 18694 7927bd9d38b9
class: Character changed: #asLowercase #asTitlecase #asUppercase
Character.st
--- a/Character.st	Sat Aug 15 18:09:26 2015 +0200
+++ b/Character.st	Sat Aug 15 18:44:41 2015 +0200
@@ -1,3 +1,5 @@
+"{ Encoding: utf8 }"
+
 "
  COPYRIGHT (c) 1988 by Claus Gittinger
 	      All Rights Reserved
@@ -511,11 +513,16 @@
      - this should only be used for emergency evaluators and the like."
 
 %{  /* NOCONTEXT */
+#   include <errno.h>
+
     int c;
 
-    c = getchar();
-    if (c < 0) {
-	RETURN (nil);
+    for (;;) {
+        c = getchar();
+        if (c >= 0) break;
+	if (errno != EINTR) {	
+	    RETURN (nil);
+	}
     }
     RETURN ( __MKCHARACTER(c & 0xFF) );
 %}.
@@ -594,7 +601,6 @@
     "
 ! !
 
-
 !Character methodsFor:'Compatibility-Dolphin'!
 
 isAlphaNumeric
@@ -838,16 +844,16 @@
     "return a character with same letter as the receiver, but in lowercase.
      Returns the receiver if it is already lowercase or if there is no lowercase equivalent.
      CAVEAT:
-	for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
-	(which is more than mozilla does, btw. ;-)"
+        for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
+        (which is more than mozilla does, btw. ;-)"
 
 %{
 #ifdef __SCHTEAM__
     {
-	char ch = self.charValue("[asLowercase]");
-
-	ch = java.lang.Character.toLowerCase(ch);
-	return context._RETURN(STCharacter._new(ch));
+        char ch = self.charValue("[asLowercase]");
+
+        ch = java.lang.Character.toLowerCase(ch);
+        return context._RETURN(STCharacter._new(ch));
     }
     /* NOTREACHED */
 #else
@@ -985,29 +991,41 @@
     REGISTER int *  __p;
 
     __codePoint = __intVal(__INST(asciivalue));
+
+    // comon ascii stuff first
+    if (__codePoint < 0x80) {
+        if ((__codePoint >= 'A') && (__codePoint <= 'Z')) {
+            unsigned newCodePoint;
+
+            newCodePoint = __codePoint - 'A' + 'a';
+            RETURN (__MKCHARACTER(newCodePoint)) ;
+        }
+        RETURN (self);
+    }
+
     for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 3) {
-	unsigned rangeStart, rangeSize, rangeEnd, mod;
-
-	rangeStart = (unsigned)__p[0];
-	if (__codePoint < rangeStart) break;
-
-	rangeSize = ((unsigned)__p[1]) >> 8;
-	rangeEnd = rangeStart + rangeSize;
-	if (__codePoint <= rangeEnd) {
-	    mod = __p[1] & 0xFF;
-	    if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
-		OBJ newChar;
-		unsigned newCodePoint;
-
-		newCodePoint = __codePoint + __p[2];
-		if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
-		    RETURN (__MKCHARACTER(newCodePoint)) ;
-		}
-		newChar = __MKUCHARACTER(newCodePoint) ;
-		if (newChar == nil) goto allocationError;
-		RETURN (newChar) ;
-	    }
-	}
+        unsigned rangeStart, rangeSize, rangeEnd, mod;
+
+        rangeStart = (unsigned)__p[0];
+        if (__codePoint < rangeStart) break;
+
+        rangeSize = ((unsigned)__p[1]) >> 8;
+        rangeEnd = rangeStart + rangeSize;
+        if (__codePoint <= rangeEnd) {
+            mod = __p[1] & 0xFF;
+            if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
+                OBJ newChar;
+                unsigned newCodePoint;
+
+                newCodePoint = __codePoint + __p[2];
+                if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
+                    RETURN (__MKCHARACTER(newCodePoint)) ;
+                }
+                newChar = __MKUCHARACTER(newCodePoint) ;
+                if (newChar == nil) goto allocationError;
+                RETURN (newChar) ;
+            }
+        }
     }
     RETURN (self);
 allocationError: ;
@@ -1106,28 +1124,39 @@
     REGISTER unsigned short *__p;
 
     __codePoint = __intVal(__INST(asciivalue));
-    for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 2) {
-	if ((__codePoint == __p[0]) || (__codePoint == __p[1])) {
-	    short newCodePoint;
-	    OBJ newChar;
-
-	    newCodePoint = __p[1];
-	    if (newCodePoint == __codePoint) {
-		RETURN (self);
-	    }
-	    if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
-		RETURN (__MKCHARACTER(newCodePoint)) ;
-	    }
-	    newChar = __MKUCHARACTER(newCodePoint) ;
-	    if (newChar == nil) goto getOutOfHere;
-	    RETURN (newChar) ;
-	}
+    if ((__codePoint > 0x01C0) && (__codePoint < 0x01FF)) {
+        for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 2) {
+            if ((__codePoint == __p[0]) || (__codePoint == __p[1])) {
+                short newCodePoint;
+                OBJ newChar;
+
+                newCodePoint = __p[1];
+                if (newCodePoint == __codePoint) {
+                    RETURN (self);
+                }
+                if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
+                    RETURN (__MKCHARACTER(newCodePoint)) ;
+                }
+                newChar = __MKUCHARACTER(newCodePoint) ;
+                if (newChar == nil) goto getOutOfHere;
+                RETURN (newChar) ;
+            }
+        }
     }
+    if (__codePoint < 0x80) {
+        // do it here for common ascii characters
+        if ((__codePoint >= 'a') && (__codePoint <= 'z')) {
+            unsigned char newCodePoint = __codePoint - 'a' + 'A';
+            RETURN (__MKCHARACTER(newCodePoint)) ;
+        }
+        RETURN (self) ;
+    }
+        
     ch = self;
 getOutOfHere: ;
 %}.
     ch notNil ifTrue:[
-	^ ch asUppercase.
+        ^ ch asUppercase.
     ].
 
     ^ ObjectMemory allocationFailureSignal raise.
@@ -1136,6 +1165,7 @@
      $A asTitlecase
      $a asTitlecase
      (Character value:16r01F1) asTitlecase
+     (Character value:16r01F2) asTitlecase
     "
 !
 
@@ -1153,16 +1183,16 @@
     "return a character with same letter as the receiver, but in uppercase.
      Returns the receiver if it is already uppercase or if there is no uppercase equivalent.
      CAVEAT:
-	for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
-	(which is more than mozilla does, btw. ;-)"
+        for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
+        (which is more than mozilla does, btw. ;-)"
 
 %{
 #ifdef __SCHTEAM__
     {
-	char ch = self.charValue("[asUppercase]");
-
-	ch = java.lang.Character.toUpperCase(ch);
-	return context._RETURN(STCharacter._new(ch));
+        char ch = self.charValue("[asUppercase]");
+
+        ch = java.lang.Character.toUpperCase(ch);
+        return context._RETURN(STCharacter._new(ch));
     }
     /* NOTREACHED */
 #else
@@ -1312,29 +1342,41 @@
     REGISTER int *__p;
 
     __codePoint = __intVal(__INST(asciivalue));
+ 
+   // comon ascii stuff first
+    if (__codePoint < 0x80) {
+        if ((__codePoint >= 'a') && (__codePoint <= 'z')) {
+            unsigned newCodePoint;
+
+            newCodePoint = __codePoint - 'a' + 'A';
+            RETURN (__MKCHARACTER(newCodePoint)) ;
+        }
+        RETURN (self);
+    }
+
     for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 3) {
-	unsigned rangeStart, rangeSize, rangeEnd, mod;
-
-	rangeStart = (unsigned)__p[0];
-	if (rangeStart > __codePoint) break;
-
-	rangeSize = ((unsigned)__p[1]) >> 8;
-	rangeEnd = rangeStart + rangeSize;
-	if (__codePoint <= rangeEnd) {
-	    mod = __p[1] & 0xFF;
-	    if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
-		OBJ newChar;
-		unsigned newCodePoint;
-
-		newCodePoint = __codePoint + __p[2];
-		if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
-		    RETURN (__MKCHARACTER(newCodePoint)) ;
-		}
-		newChar = __MKUCHARACTER(newCodePoint) ;
-		if (newChar == nil) goto allocationError;
-		RETURN (newChar) ;
-	    }
-	}
+        unsigned rangeStart, rangeSize, rangeEnd, mod;
+
+        rangeStart = (unsigned)__p[0];
+        if (rangeStart > __codePoint) break;
+
+        rangeSize = ((unsigned)__p[1]) >> 8;
+        rangeEnd = rangeStart + rangeSize;
+        if (__codePoint <= rangeEnd) {
+            mod = __p[1] & 0xFF;
+            if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
+                OBJ newChar;
+                unsigned newCodePoint;
+
+                newCodePoint = __codePoint + __p[2];
+                if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
+                    RETURN (__MKCHARACTER(newCodePoint)) ;
+                }
+                newChar = __MKUCHARACTER(newCodePoint) ;
+                if (newChar == nil) goto allocationError;
+                RETURN (newChar) ;
+            }
+        }
     }
     RETURN (self);
 allocationError: ;
@@ -1457,7 +1499,7 @@
     ^ s contents
 
     "
-	'ä' utf8Encoded
+	'ä' utf8Encoded
     "
 ! !
 
@@ -2523,9 +2565,9 @@
 
     "
      $e asNonDiacritical
-     $é asNonDiacritical
-     $ä asNonDiacritical
-     $å asNonDiacritical
+     $é asNonDiacritical
+     $ä asNonDiacritical
+     $Ã¥ asNonDiacritical
     "
 !
 
@@ -3060,3 +3102,4 @@
 version_CVS
     ^ '$Header$'
 ! !
+