Character.st
changeset 18691 0b9e9bc60d61
parent 18658 d1665870d020
child 18692 442b51ab0e41
child 18805 7e090beaf16a
equal deleted inserted replaced
18690:f0a013daec58 18691:0b9e9bc60d61
       
     1 "{ Encoding: utf8 }"
       
     2 
     1 "
     3 "
     2  COPYRIGHT (c) 1988 by Claus Gittinger
     4  COPYRIGHT (c) 1988 by Claus Gittinger
     3 	      All Rights Reserved
     5 	      All Rights Reserved
     4 
     6 
     5  This software is furnished under a license and may be used
     7  This software is furnished under a license and may be used
   509 fromUser
   511 fromUser
   510     "return a character from the keyboard (C's standard input stream)
   512     "return a character from the keyboard (C's standard input stream)
   511      - this should only be used for emergency evaluators and the like."
   513      - this should only be used for emergency evaluators and the like."
   512 
   514 
   513 %{  /* NOCONTEXT */
   515 %{  /* NOCONTEXT */
       
   516 #   include <errno.h>
       
   517 
   514     int c;
   518     int c;
   515 
   519 
   516     c = getchar();
   520     for (;;) {
   517     if (c < 0) {
   521         c = getchar();
   518 	RETURN (nil);
   522         if (c >= 0) break;
       
   523 	if (errno != EINTR) {	
       
   524 	    RETURN (nil);
       
   525 	}
   519     }
   526     }
   520     RETURN ( __MKCHARACTER(c & 0xFF) );
   527     RETURN ( __MKCHARACTER(c & 0xFF) );
   521 %}.
   528 %}.
   522     ^ Stdin next
   529     ^ Stdin next
   523 ! !
   530 ! !
   592     "
   599     "
   593      Character separators
   600      Character separators
   594     "
   601     "
   595 ! !
   602 ! !
   596 
   603 
   597 
       
   598 !Character methodsFor:'Compatibility-Dolphin'!
   604 !Character methodsFor:'Compatibility-Dolphin'!
   599 
   605 
   600 isAlphaNumeric
   606 isAlphaNumeric
   601     "Compatibility method - do not use in new code.
   607     "Compatibility method - do not use in new code.
   602      Return true, if I am a letter or a digit
   608      Return true, if I am a letter or a digit
   836 
   842 
   837 asLowercase
   843 asLowercase
   838     "return a character with same letter as the receiver, but in lowercase.
   844     "return a character with same letter as the receiver, but in lowercase.
   839      Returns the receiver if it is already lowercase or if there is no lowercase equivalent.
   845      Returns the receiver if it is already lowercase or if there is no lowercase equivalent.
   840      CAVEAT:
   846      CAVEAT:
   841 	for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
   847         for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
   842 	(which is more than mozilla does, btw. ;-)"
   848         (which is more than mozilla does, btw. ;-)"
   843 
   849 
   844 %{
   850 %{
   845 #ifdef __SCHTEAM__
   851 #ifdef __SCHTEAM__
   846     {
   852     {
   847 	char ch = self.charValue("[asLowercase]");
   853         char ch = self.charValue("[asLowercase]");
   848 
   854 
   849 	ch = java.lang.Character.toLowerCase(ch);
   855         ch = java.lang.Character.toLowerCase(ch);
   850 	return context._RETURN(STCharacter._new(ch));
   856         return context._RETURN(STCharacter._new(ch));
   851     }
   857     }
   852     /* NOTREACHED */
   858     /* NOTREACHED */
   853 #else
   859 #else
   854     static int __mapping[] = {
   860     static int __mapping[] = {
   855     /* From    To             Every   Diff   */
   861     /* From    To             Every   Diff   */
   983 
   989 
   984     REGISTER unsigned INT __codePoint;
   990     REGISTER unsigned INT __codePoint;
   985     REGISTER int *  __p;
   991     REGISTER int *  __p;
   986 
   992 
   987     __codePoint = __intVal(__INST(asciivalue));
   993     __codePoint = __intVal(__INST(asciivalue));
       
   994 
       
   995     // comon ascii stuff first
       
   996     if (__codePoint < 0x80) {
       
   997         if ((__codePoint >= 'A') && (__codePoint <= 'Z')) {
       
   998             unsigned newCodePoint;
       
   999 
       
  1000             newCodePoint = __codePoint - 'A' + 'a';
       
  1001             RETURN (__MKCHARACTER(newCodePoint)) ;
       
  1002         }
       
  1003         RETURN (self);
       
  1004     }
       
  1005 
   988     for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 3) {
  1006     for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 3) {
   989 	unsigned rangeStart, rangeSize, rangeEnd, mod;
  1007         unsigned rangeStart, rangeSize, rangeEnd, mod;
   990 
  1008 
   991 	rangeStart = (unsigned)__p[0];
  1009         rangeStart = (unsigned)__p[0];
   992 	if (__codePoint < rangeStart) break;
  1010         if (__codePoint < rangeStart) break;
   993 
  1011 
   994 	rangeSize = ((unsigned)__p[1]) >> 8;
  1012         rangeSize = ((unsigned)__p[1]) >> 8;
   995 	rangeEnd = rangeStart + rangeSize;
  1013         rangeEnd = rangeStart + rangeSize;
   996 	if (__codePoint <= rangeEnd) {
  1014         if (__codePoint <= rangeEnd) {
   997 	    mod = __p[1] & 0xFF;
  1015             mod = __p[1] & 0xFF;
   998 	    if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
  1016             if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
   999 		OBJ newChar;
  1017                 OBJ newChar;
  1000 		unsigned newCodePoint;
  1018                 unsigned newCodePoint;
  1001 
  1019 
  1002 		newCodePoint = __codePoint + __p[2];
  1020                 newCodePoint = __codePoint + __p[2];
  1003 		if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
  1021                 if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
  1004 		    RETURN (__MKCHARACTER(newCodePoint)) ;
  1022                     RETURN (__MKCHARACTER(newCodePoint)) ;
  1005 		}
  1023                 }
  1006 		newChar = __MKUCHARACTER(newCodePoint) ;
  1024                 newChar = __MKUCHARACTER(newCodePoint) ;
  1007 		if (newChar == nil) goto allocationError;
  1025                 if (newChar == nil) goto allocationError;
  1008 		RETURN (newChar) ;
  1026                 RETURN (newChar) ;
  1009 	    }
  1027             }
  1010 	}
  1028         }
  1011     }
  1029     }
  1012     RETURN (self);
  1030     RETURN (self);
  1013 allocationError: ;
  1031 allocationError: ;
  1014 #endif /* ! __SCHTEAM__ */
  1032 #endif /* ! __SCHTEAM__ */
  1015 %}.
  1033 %}.
  1104 
  1122 
  1105     REGISTER unsigned INT __codePoint;
  1123     REGISTER unsigned INT __codePoint;
  1106     REGISTER unsigned short *__p;
  1124     REGISTER unsigned short *__p;
  1107 
  1125 
  1108     __codePoint = __intVal(__INST(asciivalue));
  1126     __codePoint = __intVal(__INST(asciivalue));
  1109     for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 2) {
  1127     if ((__codePoint > 0x01C0) && (__codePoint < 0x01FF)) {
  1110 	if ((__codePoint == __p[0]) || (__codePoint == __p[1])) {
  1128         for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 2) {
  1111 	    short newCodePoint;
  1129             if ((__codePoint == __p[0]) || (__codePoint == __p[1])) {
  1112 	    OBJ newChar;
  1130                 short newCodePoint;
  1113 
  1131                 OBJ newChar;
  1114 	    newCodePoint = __p[1];
  1132 
  1115 	    if (newCodePoint == __codePoint) {
  1133                 newCodePoint = __p[1];
  1116 		RETURN (self);
  1134                 if (newCodePoint == __codePoint) {
  1117 	    }
  1135                     RETURN (self);
  1118 	    if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
  1136                 }
  1119 		RETURN (__MKCHARACTER(newCodePoint)) ;
  1137                 if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
  1120 	    }
  1138                     RETURN (__MKCHARACTER(newCodePoint)) ;
  1121 	    newChar = __MKUCHARACTER(newCodePoint) ;
  1139                 }
  1122 	    if (newChar == nil) goto getOutOfHere;
  1140                 newChar = __MKUCHARACTER(newCodePoint) ;
  1123 	    RETURN (newChar) ;
  1141                 if (newChar == nil) goto getOutOfHere;
  1124 	}
  1142                 RETURN (newChar) ;
  1125     }
  1143             }
       
  1144         }
       
  1145     }
       
  1146     if (__codePoint < 0x80) {
       
  1147         // do it here for common ascii characters
       
  1148         if ((__codePoint >= 'a') && (__codePoint <= 'z')) {
       
  1149             unsigned char newCodePoint = __codePoint - 'a' + 'A';
       
  1150             RETURN (__MKCHARACTER(newCodePoint)) ;
       
  1151         }
       
  1152         RETURN (self) ;
       
  1153     }
       
  1154         
  1126     ch = self;
  1155     ch = self;
  1127 getOutOfHere: ;
  1156 getOutOfHere: ;
  1128 %}.
  1157 %}.
  1129     ch notNil ifTrue:[
  1158     ch notNil ifTrue:[
  1130 	^ ch asUppercase.
  1159         ^ ch asUppercase.
  1131     ].
  1160     ].
  1132 
  1161 
  1133     ^ ObjectMemory allocationFailureSignal raise.
  1162     ^ ObjectMemory allocationFailureSignal raise.
  1134 
  1163 
  1135     "
  1164     "
  1136      $A asTitlecase
  1165      $A asTitlecase
  1137      $a asTitlecase
  1166      $a asTitlecase
  1138      (Character value:16r01F1) asTitlecase
  1167      (Character value:16r01F1) asTitlecase
       
  1168      (Character value:16r01F2) asTitlecase
  1139     "
  1169     "
  1140 !
  1170 !
  1141 
  1171 
  1142 asUnicodeString
  1172 asUnicodeString
  1143     "return a unicode string of len 1 with myself as contents.
  1173     "return a unicode string of len 1 with myself as contents.
  1151 
  1181 
  1152 asUppercase
  1182 asUppercase
  1153     "return a character with same letter as the receiver, but in uppercase.
  1183     "return a character with same letter as the receiver, but in uppercase.
  1154      Returns the receiver if it is already uppercase or if there is no uppercase equivalent.
  1184      Returns the receiver if it is already uppercase or if there is no uppercase equivalent.
  1155      CAVEAT:
  1185      CAVEAT:
  1156 	for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
  1186         for now, this method is only correct for unicode characters up to u+1d6ff (Unicode3.1).
  1157 	(which is more than mozilla does, btw. ;-)"
  1187         (which is more than mozilla does, btw. ;-)"
  1158 
  1188 
  1159 %{
  1189 %{
  1160 #ifdef __SCHTEAM__
  1190 #ifdef __SCHTEAM__
  1161     {
  1191     {
  1162 	char ch = self.charValue("[asUppercase]");
  1192         char ch = self.charValue("[asUppercase]");
  1163 
  1193 
  1164 	ch = java.lang.Character.toUpperCase(ch);
  1194         ch = java.lang.Character.toUpperCase(ch);
  1165 	return context._RETURN(STCharacter._new(ch));
  1195         return context._RETURN(STCharacter._new(ch));
  1166     }
  1196     }
  1167     /* NOTREACHED */
  1197     /* NOTREACHED */
  1168 #else
  1198 #else
  1169     static int __mapping[] = {
  1199     static int __mapping[] = {
  1170     /* From    To             Every   Diff   */
  1200     /* From    To             Every   Diff   */
  1310 
  1340 
  1311     REGISTER unsigned INT __codePoint;
  1341     REGISTER unsigned INT __codePoint;
  1312     REGISTER int *__p;
  1342     REGISTER int *__p;
  1313 
  1343 
  1314     __codePoint = __intVal(__INST(asciivalue));
  1344     __codePoint = __intVal(__INST(asciivalue));
       
  1345  
       
  1346    // comon ascii stuff first
       
  1347     if (__codePoint < 0x80) {
       
  1348         if ((__codePoint >= 'a') && (__codePoint <= 'z')) {
       
  1349             unsigned newCodePoint;
       
  1350 
       
  1351             newCodePoint = __codePoint - 'a' + 'A';
       
  1352             RETURN (__MKCHARACTER(newCodePoint)) ;
       
  1353         }
       
  1354         RETURN (self);
       
  1355     }
       
  1356 
  1315     for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 3) {
  1357     for (__p = __mapping; (char *)__p < ((char *)__mapping) + sizeof(__mapping); __p += 3) {
  1316 	unsigned rangeStart, rangeSize, rangeEnd, mod;
  1358         unsigned rangeStart, rangeSize, rangeEnd, mod;
  1317 
  1359 
  1318 	rangeStart = (unsigned)__p[0];
  1360         rangeStart = (unsigned)__p[0];
  1319 	if (rangeStart > __codePoint) break;
  1361         if (rangeStart > __codePoint) break;
  1320 
  1362 
  1321 	rangeSize = ((unsigned)__p[1]) >> 8;
  1363         rangeSize = ((unsigned)__p[1]) >> 8;
  1322 	rangeEnd = rangeStart + rangeSize;
  1364         rangeEnd = rangeStart + rangeSize;
  1323 	if (__codePoint <= rangeEnd) {
  1365         if (__codePoint <= rangeEnd) {
  1324 	    mod = __p[1] & 0xFF;
  1366             mod = __p[1] & 0xFF;
  1325 	    if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
  1367             if ((mod == 0) || (((__codePoint - rangeStart) % mod) == 0)) {
  1326 		OBJ newChar;
  1368                 OBJ newChar;
  1327 		unsigned newCodePoint;
  1369                 unsigned newCodePoint;
  1328 
  1370 
  1329 		newCodePoint = __codePoint + __p[2];
  1371                 newCodePoint = __codePoint + __p[2];
  1330 		if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
  1372                 if (newCodePoint <= MAX_IMMEDIATE_CHARACTER) {
  1331 		    RETURN (__MKCHARACTER(newCodePoint)) ;
  1373                     RETURN (__MKCHARACTER(newCodePoint)) ;
  1332 		}
  1374                 }
  1333 		newChar = __MKUCHARACTER(newCodePoint) ;
  1375                 newChar = __MKUCHARACTER(newCodePoint) ;
  1334 		if (newChar == nil) goto allocationError;
  1376                 if (newChar == nil) goto allocationError;
  1335 		RETURN (newChar) ;
  1377                 RETURN (newChar) ;
  1336 	    }
  1378             }
  1337 	}
  1379         }
  1338     }
  1380     }
  1339     RETURN (self);
  1381     RETURN (self);
  1340 allocationError: ;
  1382 allocationError: ;
  1341 #endif /* ! __SCHTEAM__ */
  1383 #endif /* ! __SCHTEAM__ */
  1342 %}.
  1384 %}.
  1455     s := WriteStream on:(String new:6).
  1497     s := WriteStream on:(String new:6).
  1456     s nextPutUtf8:self.
  1498     s nextPutUtf8:self.
  1457     ^ s contents
  1499     ^ s contents
  1458 
  1500 
  1459     "
  1501     "
  1460 	'ä' utf8Encoded
  1502 	'ä' utf8Encoded
  1461     "
  1503     "
  1462 ! !
  1504 ! !
  1463 
  1505 
  1464 !Character methodsFor:'copying'!
  1506 !Character methodsFor:'copying'!
  1465 
  1507 
  2521     RETURN (__MKUCHARACTER(val)) ;
  2563     RETURN (__MKUCHARACTER(val)) ;
  2522 %}
  2564 %}
  2523 
  2565 
  2524     "
  2566     "
  2525      $e asNonDiacritical
  2567      $e asNonDiacritical
  2526      $é asNonDiacritical
  2568      $é asNonDiacritical
  2527      $ä asNonDiacritical
  2569      $ä asNonDiacritical
  2528      $å asNonDiacritical
  2570      $Ã¥ asNonDiacritical
  2529     "
  2571     "
  2530 !
  2572 !
  2531 
  2573 
  2532 isNationalAlphaNumeric
  2574 isNationalAlphaNumeric
  2533     "return true, if the receiver is a letter or digit.
  2575     "return true, if the receiver is a letter or digit.
  3058 !
  3100 !
  3059 
  3101 
  3060 version_CVS
  3102 version_CVS
  3061     ^ '$Header$'
  3103     ^ '$Header$'
  3062 ! !
  3104 ! !
       
  3105