Character.st
changeset 23950 308a94fcf0cd
parent 23852 d2f4756deae3
child 24143 c6ea584e6331
equal deleted inserted replaced
23949:320a57c5dad1 23950:308a94fcf0cd
       
     1 "{ Encoding: utf8 }"
       
     2 
     1 "
     3 "
     2  COPYRIGHT (c) 1988 by Claus Gittinger
     4  COPYRIGHT (c) 1988 by Claus Gittinger
     3 	      All Rights Reserved
     5 	      All Rights Reserved
     4 
     6 
     5  This software is furnished under a license and may be used
     7  This software is furnished under a license and may be used
   697       or:[ (asciivalue == 215 )
   699       or:[ (asciivalue == 215 )
   698       or:[ (asciivalue == 247 ) ]]]]]
   700       or:[ (asciivalue == 247 ) ]]]]]
   699 ! !
   701 ! !
   700 
   702 
   701 
   703 
   702 
       
   703 !Character methodsFor:'accessing'!
   704 !Character methodsFor:'accessing'!
   704 
   705 
   705 codePoint
   706 codePoint
   706     "return the codePoint of myself.
   707     "return the codePoint of myself.
   707      Traditionally, this was named 'asciiValue';
   708      Traditionally, this was named 'asciiValue';
   858     self codePoint == aCharacter codePoint ifTrue:[^ true].
   859     self codePoint == aCharacter codePoint ifTrue:[^ true].
   859     ^ self asLowercase codePoint == aCharacter asLowercase codePoint.
   860     ^ self asLowercase codePoint == aCharacter asLowercase codePoint.
   860 
   861 
   861     "
   862     "
   862       (Character value:345) sameAs:(Character value:345)
   863       (Character value:345) sameAs:(Character value:345)
   863       $Ж sameAs:$ж 
   864       $Ж sameAs:$ж 
   864       $ж sameAs:$Ж 
   865       $ж sameAs:$Ж 
   865     "
   866     "
   866 
   867 
   867     "Modified (comment): / 28-03-2017 / 16:19:48 / stefan"
   868     "Modified (comment): / 28-03-2017 / 16:19:48 / stefan"
   868 !
   869 !
   869 
   870 
  1557     s := WriteStream on:(String new:self utf8BytesPerCharacter).
  1558     s := WriteStream on:(String new:self utf8BytesPerCharacter).
  1558     s nextPutUtf8:self.
  1559     s nextPutUtf8:self.
  1559     ^ s contents
  1560     ^ s contents
  1560 
  1561 
  1561     "
  1562     "
  1562      'ä' utf8Encoded
  1563      'ä' utf8Encoded
  1563      'a' utf8Encoded
  1564      'a' utf8Encoded
  1564     "
  1565     "
  1565 
  1566 
  1566     "Modified: / 07-02-2017 / 14:37:06 / stefan"
  1567     "Modified: / 07-02-2017 / 14:37:06 / stefan"
  1567 !
  1568 !
  1568 
  1569 
  1569 withoutDiacritics
  1570 withoutDiacritics
  1570     <resource: #todo>
  1571     <resource: #todo>
  1571     "return a character with same letter as the receiver, but in without diacritics modifiers
  1572     "return a character with same letter as the receiver, but in without diacritics modifiers
  1572      (mapping e.g. Ä to A).
  1573      (mapping e.g. Ä to A).
  1573      Returns the receiver if it has no diacritics modifiers."
  1574      Returns the receiver if it has no diacritics modifiers."
  1574 
  1575 
  1575     ^ self shouldImplement
  1576     ^ self shouldImplement
  1576 
  1577 
  1577     "Created: / 28-03-2017 / 16:01:45 / stefan"
  1578     "Created: / 28-03-2017 / 16:01:45 / stefan"
  1589     unsigned INT val;
  1590     unsigned INT val;
  1590 
  1591 
  1591     // fast code for common cases
  1592     // fast code for common cases
  1592     val = __intVal(__characterVal(self));
  1593     val = __intVal(__characterVal(self));
  1593     if (val <= 0xFF) {
  1594     if (val <= 0xFF) {
  1594 	if (__isCharacter(aStringOrCharacter)) {
  1595         if (__isCharacter(aStringOrCharacter)) {
  1595 	    unsigned INT val2 = __intVal(__characterVal(aStringOrCharacter));
  1596             unsigned INT val2 = __intVal(__characterVal(aStringOrCharacter));
  1596 
  1597 
  1597 	    if (val2 <= 0xFF) {
  1598             if (val2 <= 0xFF) {
  1598 		char buffer[2];
  1599                 char buffer[2];
  1599 
  1600 
  1600 		buffer[0] = val;
  1601                 buffer[0] = val;
  1601 		buffer[1] = val2;
  1602                 buffer[1] = val2;
  1602 		s = __MKSTRING_L(buffer, 2);
  1603                 s = __MKSTRING_L(buffer, 2);
  1603 		if (s != nil) {
  1604                 if (s != nil) {
  1604 		    RETURN (s);
  1605                     RETURN (s);
  1605 		}
  1606                 }
  1606 	    }
  1607             }
  1607 	} else {
  1608         } else {
  1608 	    if (__isString(aStringOrCharacter)) {
  1609             if (__isStringLike(aStringOrCharacter)) {
  1609 		int strSize = __stringSize(aStringOrCharacter);
  1610                 int strSize = __stringSize(aStringOrCharacter);
  1610 
  1611 
  1611 		s = __MKEMPTYSTRING(strSize+1);
  1612                 s = __MKEMPTYSTRING(strSize+1);
  1612 		if (s != nil) {
  1613                 if (s != nil) {
  1613 		    __StringInstPtr(s)->s_element[0] = val;
  1614                     __StringInstPtr(s)->s_element[0] = val;
  1614 		    memcpy(__StringInstPtr(s)->s_element+1, __stringVal(aStringOrCharacter), strSize+1); // copies 0-byte too
  1615                     memcpy(__StringInstPtr(s)->s_element+1, __stringVal(aStringOrCharacter), strSize+1); // copies 0-byte too
  1615 		    RETURN (s);
  1616                     RETURN (s);
  1616 		}
  1617                 }
  1617 	    }
  1618             }
  1618 	}
  1619         }
  1619     }
  1620     }
  1620 %}.
  1621 %}.
  1621     ^ self asString , aStringOrCharacter
  1622     ^ self asString , aStringOrCharacter
  1622 
  1623 
  1623     "
  1624     "
  1627       Time millisecondsToRun:[ 10000000 timesRepeat:[ $a , $b ]]
  1628       Time millisecondsToRun:[ 10000000 timesRepeat:[ $a , $b ]]
  1628       Time millisecondsToRun:[ 10000000 timesRepeat:[ $a , 'b' ]]
  1629       Time millisecondsToRun:[ 10000000 timesRepeat:[ $a , 'b' ]]
  1629       Time millisecondsToRun:[ 10000000 timesRepeat:[ 'a' , 'b' ]]
  1630       Time millisecondsToRun:[ 10000000 timesRepeat:[ 'a' , 'b' ]]
  1630       Time millisecondsToRun:[ 10000000 timesRepeat:[ 'a' , $b ]]
  1631       Time millisecondsToRun:[ 10000000 timesRepeat:[ 'a' , $b ]]
  1631     "
  1632     "
       
  1633 
       
  1634     "Modified: / 22-03-2019 / 03:00:09 / Claus Gittinger"
  1632 !
  1635 !
  1633 
  1636 
  1634 copy
  1637 copy
  1635     "return a copy of myself
  1638     "return a copy of myself
  1636      reimplemented since characters are unique"
  1639      reimplemented since characters are unique"
  2753     RETURN (__MKUCHARACTER(val)) ;
  2756     RETURN (__MKUCHARACTER(val)) ;
  2754 %}
  2757 %}
  2755 
  2758 
  2756     "
  2759     "
  2757      $e asNonDiacritical
  2760      $e asNonDiacritical
  2758      $é asNonDiacritical
  2761      $é asNonDiacritical
  2759      $ä asNonDiacritical
  2762      $ä asNonDiacritical
  2760      $å asNonDiacritical
  2763      $Ã¥ asNonDiacritical
  2761     "
  2764     "
  2762 !
  2765 !
  2763 
  2766 
  2764 isNationalAlphaNumeric
  2767 isNationalAlphaNumeric
  2765     "return true, if the receiver is a letter or digit.
  2768     "return true, if the receiver is a letter or digit.