00001 00009 #include "translator.h" 00010 00012 const char Translator::Win1250ToISO88592Tab[] = 00013 { 00014 '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', 00015 '\x88', '\x89', '\xA9', '\x8B', '\xA6', '\xAB', '\xAE', '\xAC', 00016 '\x90', '\x91', '\x92', '\x93', '\x94', '\x2E', '\x96', '\x97', 00017 '\x98', '\x99', '\xB9', '\x9B', '\xB6', '\xBB', '\xBE', '\xBC', 00018 '\xA0', '\x20', '\x20', '\xA3', '\xA4', '\xA1', '\xA6', '\xA7', 00019 '\x22', '\xA9', '\xAA', '\x3C', '\xAC', '\x2D', '\xAE', '\xAF', 00020 '\x2E', '\x2B', '\x20', '\xB3', '\x27', '\x75', '\xB6', '\xB7', 00021 '\x20', '\xB1', '\xBA', '\x3E', '\xA5', '\x22', '\xB5', '\xBF', 00022 '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', 00023 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF', 00024 '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7', 00025 '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF', 00026 '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', 00027 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF', 00028 '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\x2D', 00029 '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF', 00030 '\0' 00031 }; 00032 00033 00035 const char Translator::ISO88592ToWin1250Tab[] = { 00036 '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', 00037 '\x88', '\x89', '\x8A', '\x8B', '\x8C', '\x8D', '\x8E', '\x8F', 00038 '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', 00039 '\x98', '\x99', '\x9A', '\x9B', '\x9C', '\x9D', '\x9E', '\x9F', 00040 '\xA0', '\xA5', '\xA2', '\xA3', '\xA4', '\xBC', '\x8C', '\xA7', 00041 '\xA8', '\x8A', '\xAA', '\x8D', '\x8F', '\xAD', '\x8E', '\xAF', 00042 '\xB0', '\xB9', '\xB2', '\xB3', '\xB4', '\xBE', '\x9C', '\xB7', 00043 '\xB8', '\x9A', '\xBA', '\x9D', '\x9F', '\xBD', '\x9E', '\xBF', 00044 '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7', 00045 '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF', 00046 '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7', 00047 '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF', 00048 '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7', 00049 '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF', 00050 '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\xF7', 00051 '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF', 00052 '\0' 00053 }; 00054 00055 00057 const unsigned char Translator::Koi8RToWindows1251Tab[128] = 00058 { 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, 00059 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, 00060 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, 00061 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, 00062 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238, 00063 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250, 00064 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206, 00065 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218 00066 }; 00067 00068 00070 const unsigned char Translator::Windows1251ToKoi8RTab[128] = 00071 { 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, 00072 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, 00073 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, 00074 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, 00075 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240, 00076 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241, 00077 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208, 00078 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209 00079 }; 00080 00082 /* The method was designed initially for translator_cz.h. 00083 * It is used for on-line encoding conversion related to 00084 * conditional compilation in Unix/MS Windows environments 00085 * (both use different encoding). Later, the translator_hr.h 00086 * (by Boris Bralo) used and improved the same style. As the 00087 * method with the translation table was the same, the 00088 * decision to move it to this base class was made. The same 00089 * holds for ISO88592ToWin1250() method. 00090 * 00091 * Alexandr Chelpanov used the same approach for 00092 * Koi8RToWindows1251() and Windows1251ToKoi8R() methods. Notice, 00093 * that he uses Unicode tables. 00094 * 00095 * It is recommended for possibly other similar methods in future. 00096 */ 00097 QCString Translator::Win1250ToISO88592(const QCString & sInput) 00098 { 00099 // The conversion table for characters >127 00100 // 00101 00102 QCString result; 00103 int len = sInput.length(); 00104 00105 for (int i = 0; i < len; ++i) 00106 { 00107 unsigned int c = sInput[i]; 00108 result += (c > 127) ? Win1250ToISO88592Tab[c & 0x7F] : c; 00109 } 00110 return result; 00111 } 00112 00113 00115 /* See the comments of the Win1250ToISO88592() method for details. */ 00116 QCString Translator::ISO88592ToWin1250(const QCString & sInput) 00117 { 00118 // The conversion table for characters >127 00119 // 00120 QCString result; 00121 int len = sInput.length(); 00122 00123 for (int i = 0; i < len; ++i) 00124 { 00125 unsigned int c = sInput[i]; 00126 result += (c > 127) ? ISO88592ToWin1250Tab[c & 0x7F] : c; 00127 } 00128 return result; 00129 } 00130 00131 00133 /* The method was designed initially for translator_cz.h. 00134 It is used for on-line encoding conversion related to conditional 00135 compilation in Unix/MS Windows environments (both use different 00136 encoding). Encoding table got from QT:qtextcodec.cpp 00137 */ 00138 QCString Translator::Koi8RToWindows1251( const QCString & sInput ) 00139 { 00140 00141 QCString result(sInput); 00142 int len = sInput.length(); 00143 00144 const unsigned char * c = (const unsigned char *)(const char*)sInput; 00145 unsigned char *dc = (unsigned char*)(const char*)result; 00146 for( int i=0; i<len; i++ ) { 00147 if ( c[i] > 127 ) 00148 dc[i] = Koi8RToWindows1251Tab[c[i]-128]; 00149 } 00150 return result; 00151 } 00152 00153 00155 /* See the comments of the Koi8RToWindows1251() method for details. 00156 Encoding table got from QT:qtextcodec.cpp */ 00157 QCString Translator::Windows1251ToKoi8R( const QCString & sInput ) 00158 { 00159 QCString result(sInput); 00160 int len = sInput.length(); 00161 00162 const unsigned char * c = (const unsigned char *)(const char*)sInput; 00163 unsigned char *dc = (unsigned char*)(const char*)result; 00164 for( int i=0; i<len; i++ ) { 00165 if ( c[i] > 127 ) 00166 dc[i] = Windows1251ToKoi8RTab[c[i]-128]; 00167 } 00168 return result; 00169 } 00170 00173 unsigned int hankaku2zen(int hankaku) 00174 { 00175 static unsigned int z[64] = { 00176 0x2121,0x2123,0x2156,0x2157,0x2122,0x2126,0x2572,0x2521, 00177 0x2523,0x2525,0x2527,0x2529,0x2563,0x2565,0x2567,0x2543, 00178 0x213c,0x2522,0x2524,0x2526,0x2528,0x252a,0x252b,0x252d, 00179 0x252f,0x2531,0x2533,0x2535,0x2537,0x2539,0x253b,0x253d, 00180 0x253f,0x2541,0x2544,0x2546,0x2548,0x254a,0x254b,0x254c, 00181 0x254d,0x254e,0x254f,0x2552,0x2555,0x2558,0x255b,0x255e, 00182 0x255f,0x2560,0x2561,0x2562,0x2564,0x2566,0x2568,0x2569, 00183 0x256a,0x256b,0x256c,0x256d,0x256f,0x2573,0x212b,0x212c }; 00184 00185 if (hankaku < 0xa0 || hankaku > 0xdf) return 0; 00186 return z[hankaku - 0xa0]; 00187 } 00188 00191 unsigned int euc2sjis(unsigned int euc) 00192 { 00193 unsigned int jis; 00194 unsigned int hib, lob; 00195 00196 if ((euc & 0xff00) == 0x8e00) 00197 jis = hankaku2zen(euc & 0xff); 00198 else jis = euc & ~0x8080; 00199 00200 hib = (jis >> 8) & 0xff; 00201 lob = jis & 0xff; 00202 lob += (hib & 1) ? 0x1f : 0x7d; 00203 if (lob >= 0x7f) lob++; 00204 hib = ((hib - 0x21) >> 1) + 0x81; 00205 if (hib > 0x9f) hib += 0x40; 00206 00207 return (hib << 8) | lob; 00208 } 00209 00210 00213 QCString Translator::JapaneseEucToSjis( const QCString & sInput ) 00214 { 00215 QString result; 00216 int len = sInput.length(); 00217 int c1,c2,sj; 00218 00219 result.setUnicode(0, len); 00220 QChar* uc = (QChar*)result.unicode(); // const_cast 00221 const unsigned char * c = (const unsigned char *)(const char*)sInput; 00222 00223 for( int i=0; i<len;) 00224 { 00225 c1 = c[i]; 00226 00227 if( c1 == EOF ) break; 00228 00229 /* if MSB=0 then the character is ascii */ 00230 if(!( c1 & 0x80)) 00231 { 00232 uc[i] = c[i]; 00233 i=i+1; 00234 } 00235 else 00236 { 00237 c2 = c[i+1]; 00238 if( c2 == EOF ) break; 00239 sj = euc2sjis( (c1 << 8) + c2 ); 00240 uc[i] = sj >> 8; 00241 uc[i+1] = sj & 0xff; 00242 i+=2; 00243 } 00244 } 00245 00246 return result.latin1(); 00247 00248 }