614 |
614 |
615 CharacterEncoder::JIS0208 flushCode; generateCode. |
615 CharacterEncoder::JIS0208 flushCode; generateCode. |
616 " |
616 " |
617 ! ! |
617 ! ! |
618 |
618 |
|
619 !CharacterEncoder class methodsFor:'instance creation'! |
|
620 |
|
621 encoderFor:encodingNameSymbol |
|
622 "given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
623 |
|
624 ^ self |
|
625 encoderFor:encodingNameSymbol |
|
626 ifAbsent:[ |
|
627 self error:'no encoder for ' , encodingNameSymbol mayProceed:true. |
|
628 NullEncoder new |
|
629 ] |
|
630 |
|
631 " |
|
632 CharacterEncoder encoderFor:#'latin1' |
|
633 self encoderFor:#'arabic' |
|
634 self encoderFor:#'ms-arabic' |
|
635 self encoderFor:#'iso8859-5' |
|
636 self encoderFor:#'koi8-r' |
|
637 self encoderFor:#'koi8-u' |
|
638 self encoderFor:#'jis0208' |
|
639 self encoderFor:#'jis7' |
|
640 " |
|
641 ! |
|
642 |
|
643 encoderFor:encodingNameSymbol ifAbsent:exceptionValue |
|
644 "given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
645 |
|
646 |cls lcName name| |
|
647 |
|
648 lcName := encodingNameSymbol asLowercase asSymbolIfInterned. |
|
649 name := lcName ? encodingNameSymbol. |
|
650 |
|
651 cls := EncodersByName at:name ifAbsent:nil. |
|
652 cls notNil ifTrue:[^ cls new ]. |
|
653 |
|
654 self allSubclassesDo:[:cls | |
|
655 cls nameOfDecodedCode == #unicode ifTrue:[ |
|
656 cls nameOfEncoding = name ifTrue:[ |
|
657 EncodersByName at:name put:cls. |
|
658 ^ cls new. |
|
659 ] |
|
660 ]. |
|
661 ]. |
|
662 self allSubclassesDo:[:cls | |
|
663 cls nameOfDecodedCode == #unicode ifTrue:[ |
|
664 (cls alternativeNamesOfEncoding includes:name) ifTrue:[ |
|
665 EncodersByName at:name put:cls. |
|
666 ^ cls new. |
|
667 ]. |
|
668 ]. |
|
669 ]. |
|
670 |
|
671 self allSubclassesDo:[:cls | |
|
672 ((cls nameOfEncoding = name) |
|
673 or:[(cls alternativeNamesOfEncoding includes:name)]) ifTrue:[ |
|
674 "/ ok, found some other encoder - need a compound encoder then. |
|
675 "/ the one found encodes into what we need, but needs something else as input. |
|
676 |
|
677 ^ TwoStepEncoder new |
|
678 encoder1:(self encoderFor:(cls nameOfDecodedCode)) |
|
679 encoder2:(cls new). |
|
680 ]. |
|
681 ]. |
|
682 ^ exceptionValue value |
|
683 |
|
684 " |
|
685 CharacterEncoder encoderFor:#'latin1' |
|
686 self encoderFor:#'arabic' |
|
687 self encoderFor:#'ms-arabic' |
|
688 self encoderFor:#'iso8859-5' |
|
689 self encoderFor:#'koi8-r' |
|
690 self encoderFor:#'koi8-u' |
|
691 self encoderFor:#'jis0208' |
|
692 self encoderFor:#'jis7' |
|
693 " |
|
694 ! |
|
695 |
|
696 encoderToEncodeFrom:oldEncodingArg into:newEncodingArg |
|
697 |oldEncoding newEncoding encoder decoder| |
|
698 |
|
699 oldEncoding := oldEncodingArg ? #'unicode'. |
|
700 newEncoding := newEncodingArg ? #'unicode'. |
|
701 oldEncoding == newEncoding ifTrue:[^ NullEncoder new]. |
|
702 (oldEncoding match:newEncoding) ifTrue:[^ NullEncoder new]. |
|
703 "/ (newEncoding match:oldEncoding) ifTrue:[^ NullEncoder new]. |
|
704 |
|
705 ((oldEncoding == #unicode) or:[(oldEncoding == #'iso10646-1')]) ifTrue:[ |
|
706 ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[^ NullEncoder new]. |
|
707 |
|
708 "/ unicode -> something |
|
709 ^ self encoderFor:newEncoding. |
|
710 ]. |
|
711 ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[ |
|
712 "/ something -> unicode |
|
713 decoder := self encoderFor:oldEncoding. |
|
714 ^ InverseEncoder new decoder:decoder. |
|
715 ]. |
|
716 |
|
717 "/ look for a specialized encoder... |
|
718 self allSubclassesDo:[:cls | |
|
719 (cls nameOfEncoding = oldEncoding |
|
720 or:[ cls alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[ |
|
721 |nameOfDecodedCode encoderForDecodedCode| |
|
722 |
|
723 nameOfDecodedCode := cls nameOfDecodedCode. |
|
724 encoderForDecodedCode := self encoderFor:nameOfDecodedCode. |
|
725 (nameOfDecodedCode = newEncoding |
|
726 or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[ |
|
727 ^ InverseEncoder new decoder:cls new. |
|
728 ] |
|
729 ]. |
|
730 (cls nameOfEncoding = newEncoding |
|
731 or:[ cls alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[ |
|
732 |nameOfDecodedCode encoderForDecodedCode| |
|
733 |
|
734 nameOfDecodedCode := cls nameOfDecodedCode. |
|
735 encoderForDecodedCode := self encoderFor:nameOfDecodedCode. |
|
736 (nameOfDecodedCode = oldEncoding |
|
737 or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[ |
|
738 ^ cls new. |
|
739 ] |
|
740 ]. |
|
741 ]. |
|
742 |
|
743 "/ do it as: oldEncoding -> unicode -> newEncoding |
|
744 |
|
745 "/ something -> unicode |
|
746 decoder := self encoderFor:oldEncoding. |
|
747 |
|
748 "/ unicode -> something |
|
749 encoder := self encoderFor:newEncoding. |
|
750 ^ CompoundEncoder new encoder:encoder decoder:decoder |
|
751 ! |
|
752 |
|
753 unicodeEncoderFor:encodingNameSymbol |
|
754 "given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
755 |
|
756 self obsoleteMethodWarning. |
|
757 ^ self encoderFor:encodingNameSymbol |
|
758 |
|
759 " |
|
760 CharacterEncoder unicodeEncoderFor:#'latin1' |
|
761 self unicodeEncoderFor:#'arabic' |
|
762 self unicodeEncoderFor:#'ms-arabic' |
|
763 self unicodeEncoderFor:#'iso8859-5' |
|
764 self unicodeEncoderFor:#'koi8-r' |
|
765 self unicodeEncoderFor:#'koi8-u' |
|
766 self unicodeEncoderFor:#'jis0208' |
|
767 self unicodeEncoderFor:#'jis7' |
|
768 " |
|
769 ! |
|
770 |
|
771 unicodeEncoderFor:encodingNameSymbol ifAbsent:exceptionValue |
|
772 "given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
773 |
|
774 self obsoleteMethodWarning. |
|
775 ^ self encoderFor:encodingNameSymbol ifAbsent:exceptionValue |
|
776 |
|
777 " |
|
778 CharacterEncoder unicodeEncoderFor:#'latin1' |
|
779 self unicodeEncoderFor:#'arabic' |
|
780 self unicodeEncoderFor:#'ms-arabic' |
|
781 self unicodeEncoderFor:#'iso8859-5' |
|
782 self unicodeEncoderFor:#'koi8-r' |
|
783 self unicodeEncoderFor:#'koi8-u' |
|
784 self unicodeEncoderFor:#'jis0208' |
|
785 self unicodeEncoderFor:#'jis7' |
|
786 " |
|
787 ! ! |
|
788 |
619 !CharacterEncoder class methodsFor:'Compatibility-ST80'! |
789 !CharacterEncoder class methodsFor:'Compatibility-ST80'! |
620 |
790 |
621 encoderNamed: encoderName |
791 encoderNamed: encoderName |
622 "/ q & d hack |
792 "/ q & d hack |
623 |
793 |
723 ^ (self encoderToEncodeFrom:oldEncoding into:newEncoding) encodeString:aString. |
893 ^ (self encoderToEncodeFrom:oldEncoding into:newEncoding) encodeString:aString. |
724 ! |
894 ! |
725 |
895 |
726 encodeString:aString into:newEncodingArg |
896 encodeString:aString into:newEncodingArg |
727 ^ self encodeString:aString from:'unicode' into:newEncodingArg |
897 ^ self encodeString:aString from:'unicode' into:newEncodingArg |
728 ! |
|
729 |
|
730 encoderToEncodeFrom:oldEncodingArg into:newEncodingArg |
|
731 |oldEncoding newEncoding encoder decoder| |
|
732 |
|
733 oldEncoding := oldEncodingArg ? #'unicode'. |
|
734 newEncoding := newEncodingArg ? #'unicode'. |
|
735 oldEncoding == newEncoding ifTrue:[^ NullEncoder new]. |
|
736 (oldEncoding match:newEncoding) ifTrue:[^ NullEncoder new]. |
|
737 "/ (newEncoding match:oldEncoding) ifTrue:[^ NullEncoder new]. |
|
738 |
|
739 ((oldEncoding == #unicode) or:[(oldEncoding == #'iso10646-1')]) ifTrue:[ |
|
740 ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[^ NullEncoder new]. |
|
741 |
|
742 "/ unicode -> something |
|
743 ^ self encoderFor:newEncoding. |
|
744 ]. |
|
745 ((newEncoding == #unicode) or:[(newEncoding == #'iso10646-1')]) ifTrue:[ |
|
746 "/ something -> unicode |
|
747 decoder := self encoderFor:oldEncoding. |
|
748 ^ InverseEncoder new decoder:decoder. |
|
749 ]. |
|
750 |
|
751 "/ look for a specialized encoder... |
|
752 self allSubclassesDo:[:cls | |
|
753 (cls nameOfEncoding = oldEncoding |
|
754 or:[ cls alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[ |
|
755 |nameOfDecodedCode encoderForDecodedCode| |
|
756 |
|
757 nameOfDecodedCode := cls nameOfDecodedCode. |
|
758 encoderForDecodedCode := self encoderFor:nameOfDecodedCode. |
|
759 (nameOfDecodedCode = newEncoding |
|
760 or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[ |
|
761 ^ InverseEncoder new decoder:cls new. |
|
762 ] |
|
763 ]. |
|
764 (cls nameOfEncoding = newEncoding |
|
765 or:[ cls alternativeNamesOfEncoding includes:newEncoding ]) ifTrue:[ |
|
766 |nameOfDecodedCode encoderForDecodedCode| |
|
767 |
|
768 nameOfDecodedCode := cls nameOfDecodedCode. |
|
769 encoderForDecodedCode := self encoderFor:nameOfDecodedCode. |
|
770 (nameOfDecodedCode = oldEncoding |
|
771 or:[ encoderForDecodedCode class alternativeNamesOfEncoding includes:oldEncoding ]) ifTrue:[ |
|
772 ^ cls new. |
|
773 ] |
|
774 ]. |
|
775 ]. |
|
776 |
|
777 "/ do it as: oldEncoding -> unicode -> newEncoding |
|
778 |
|
779 "/ something -> unicode |
|
780 decoder := self encoderFor:oldEncoding. |
|
781 |
|
782 "/ unicode -> something |
|
783 encoder := self encoderFor:newEncoding. |
|
784 ^ CompoundEncoder new encoder:encoder decoder:decoder |
|
785 ! ! |
898 ! ! |
786 |
899 |
787 !CharacterEncoder class methodsFor:'private'! |
900 !CharacterEncoder class methodsFor:'private'! |
788 |
901 |
789 flushCode |
902 flushCode |
872 ^ #() |
985 ^ #() |
873 ! |
986 ! |
874 |
987 |
875 alternativeNamesOfEncoding |
988 alternativeNamesOfEncoding |
876 ^ #() |
989 ^ #() |
877 ! |
|
878 |
|
879 encoderFor:encodingNameSymbol |
|
880 "given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
881 |
|
882 ^ self |
|
883 encoderFor:encodingNameSymbol |
|
884 ifAbsent:[ |
|
885 self error:'no encoder for ' , encodingNameSymbol mayProceed:true. |
|
886 NullEncoder new |
|
887 ] |
|
888 |
|
889 " |
|
890 CharacterEncoder encoderFor:#'latin1' |
|
891 self encoderFor:#'arabic' |
|
892 self encoderFor:#'ms-arabic' |
|
893 self encoderFor:#'iso8859-5' |
|
894 self encoderFor:#'koi8-r' |
|
895 self encoderFor:#'koi8-u' |
|
896 self encoderFor:#'jis0208' |
|
897 self encoderFor:#'jis7' |
|
898 " |
|
899 ! |
|
900 |
|
901 encoderFor:encodingNameSymbol ifAbsent:exceptionValue |
|
902 "given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
903 |
|
904 |cls lcName name| |
|
905 |
|
906 lcName := encodingNameSymbol asLowercase asSymbolIfInterned. |
|
907 name := lcName ? encodingNameSymbol. |
|
908 |
|
909 cls := EncodersByName at:name ifAbsent:nil. |
|
910 cls notNil ifTrue:[^ cls new ]. |
|
911 |
|
912 self allSubclassesDo:[:cls | |
|
913 cls nameOfDecodedCode == #unicode ifTrue:[ |
|
914 cls nameOfEncoding = name ifTrue:[ |
|
915 EncodersByName at:name put:cls. |
|
916 ^ cls new. |
|
917 ] |
|
918 ]. |
|
919 ]. |
|
920 self allSubclassesDo:[:cls | |
|
921 cls nameOfDecodedCode == #unicode ifTrue:[ |
|
922 (cls alternativeNamesOfEncoding includes:name) ifTrue:[ |
|
923 EncodersByName at:name put:cls. |
|
924 ^ cls new. |
|
925 ]. |
|
926 ]. |
|
927 ]. |
|
928 |
|
929 self allSubclassesDo:[:cls | |
|
930 ((cls nameOfEncoding = name) |
|
931 or:[(cls alternativeNamesOfEncoding includes:name)]) ifTrue:[ |
|
932 "/ ok, found some other encoder - need a compound encoder then. |
|
933 "/ the one found encodes into what we need, but needs something else as input. |
|
934 |
|
935 ^ TwoStepEncoder new |
|
936 encoder1:(self encoderFor:(cls nameOfDecodedCode)) |
|
937 encoder2:(cls new). |
|
938 ]. |
|
939 ]. |
|
940 ^ exceptionValue value |
|
941 |
|
942 " |
|
943 CharacterEncoder encoderFor:#'latin1' |
|
944 self encoderFor:#'arabic' |
|
945 self encoderFor:#'ms-arabic' |
|
946 self encoderFor:#'iso8859-5' |
|
947 self encoderFor:#'koi8-r' |
|
948 self encoderFor:#'koi8-u' |
|
949 self encoderFor:#'jis0208' |
|
950 self encoderFor:#'jis7' |
|
951 " |
|
952 ! |
990 ! |
953 |
991 |
954 isEncoding:subSetEncodingArg subSetOf:superSetEncodingArg |
992 isEncoding:subSetEncodingArg subSetOf:superSetEncodingArg |
955 "return true, if superSetEncoding encoding includes all characters of subSetEncoding" |
993 "return true, if superSetEncoding encoding includes all characters of subSetEncoding" |
956 |
994 |
1033 nil |
1071 nil |
1034 ('gb' 'GB - mainland chin' ) |
1072 ('gb' 'GB - mainland chin' ) |
1035 ('big5' 'BIG5 - taiwan' ) |
1073 ('big5' 'BIG5 - taiwan' ) |
1036 "/ ('ksc' 'korean' ) |
1074 "/ ('ksc' 'korean' ) |
1037 ) |
1075 ) |
1038 ! |
|
1039 |
|
1040 unicodeEncoderFor:encodingNameSymbol |
|
1041 "given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
1042 |
|
1043 self obsoleteMethodWarning. |
|
1044 ^ self encoderFor:encodingNameSymbol |
|
1045 |
|
1046 " |
|
1047 CharacterEncoder unicodeEncoderFor:#'latin1' |
|
1048 self unicodeEncoderFor:#'arabic' |
|
1049 self unicodeEncoderFor:#'ms-arabic' |
|
1050 self unicodeEncoderFor:#'iso8859-5' |
|
1051 self unicodeEncoderFor:#'koi8-r' |
|
1052 self unicodeEncoderFor:#'koi8-u' |
|
1053 self unicodeEncoderFor:#'jis0208' |
|
1054 self unicodeEncoderFor:#'jis7' |
|
1055 " |
|
1056 ! |
|
1057 |
|
1058 unicodeEncoderFor:encodingNameSymbol ifAbsent:exceptionValue |
|
1059 "given the name of an encoding, return an encoder-instance which can map these from/into unicode." |
|
1060 |
|
1061 self obsoleteMethodWarning. |
|
1062 ^ self encoderFor:encodingNameSymbol ifAbsent:exceptionValue |
|
1063 |
|
1064 " |
|
1065 CharacterEncoder unicodeEncoderFor:#'latin1' |
|
1066 self unicodeEncoderFor:#'arabic' |
|
1067 self unicodeEncoderFor:#'ms-arabic' |
|
1068 self unicodeEncoderFor:#'iso8859-5' |
|
1069 self unicodeEncoderFor:#'koi8-r' |
|
1070 self unicodeEncoderFor:#'koi8-u' |
|
1071 self unicodeEncoderFor:#'jis0208' |
|
1072 self unicodeEncoderFor:#'jis7' |
|
1073 " |
|
1074 ! |
1076 ! |
1075 |
1077 |
1076 userFriendlyNameOfEncoding |
1078 userFriendlyNameOfEncoding |
1077 ^ self nameOfEncoding |
1079 ^ self nameOfEncoding |
1078 ! ! |
1080 ! ! |