translator.cpp Source File | Doxygen Source Document

00001 
00009 #include "translator.h"
00010 
00012 const char Translator::Win1250ToISO88592Tab[] = 
00013 {
00014   '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
00015   '\x88', '\x89', '\xA9', '\x8B', '\xA6', '\xAB', '\xAE', '\xAC',
00016   '\x90', '\x91', '\x92', '\x93', '\x94', '\x2E', '\x96', '\x97',
00017   '\x98', '\x99', '\xB9', '\x9B', '\xB6', '\xBB', '\xBE', '\xBC',
00018   '\xA0', '\x20', '\x20', '\xA3', '\xA4', '\xA1', '\xA6', '\xA7',
00019   '\x22', '\xA9', '\xAA', '\x3C', '\xAC', '\x2D', '\xAE', '\xAF',
00020   '\x2E', '\x2B', '\x20', '\xB3', '\x27', '\x75', '\xB6', '\xB7',
00021   '\x20', '\xB1', '\xBA', '\x3E', '\xA5', '\x22', '\xB5', '\xBF',
00022   '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
00023   '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF',
00024   '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7',
00025   '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF',
00026   '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
00027   '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF',
00028   '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\x2D',
00029   '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF',
00030   '\0'
00031 };
00032 
00033 
00035 const char Translator::ISO88592ToWin1250Tab[] = {
00036   '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
00037   '\x88', '\x89', '\x8A', '\x8B', '\x8C', '\x8D', '\x8E', '\x8F',
00038   '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
00039   '\x98', '\x99', '\x9A', '\x9B', '\x9C', '\x9D', '\x9E', '\x9F',
00040   '\xA0', '\xA5', '\xA2', '\xA3', '\xA4', '\xBC', '\x8C', '\xA7',
00041   '\xA8', '\x8A', '\xAA', '\x8D', '\x8F', '\xAD', '\x8E', '\xAF',
00042   '\xB0', '\xB9', '\xB2', '\xB3', '\xB4', '\xBE', '\x9C', '\xB7',
00043   '\xB8', '\x9A', '\xBA', '\x9D', '\x9F', '\xBD', '\x9E', '\xBF',
00044   '\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\xC6', '\xC7',
00045   '\xC8', '\xC9', '\xCA', '\xCB', '\xCC', '\xCD', '\xCE', '\xCF',
00046   '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD7',
00047   '\xD8', '\xD9', '\xDA', '\xDB', '\xDC', '\xDD', '\xDE', '\xDF',
00048   '\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\xE6', '\xE7',
00049   '\xE8', '\xE9', '\xEA', '\xEB', '\xEC', '\xED', '\xEE', '\xEF',
00050   '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\xF7',
00051   '\xF8', '\xF9', '\xFA', '\xFB', '\xFC', '\xFD', '\xFE', '\xFF',
00052   '\0'
00053 };
00054 
00055 
00057 const unsigned char Translator::Koi8RToWindows1251Tab[128] =
00058 { 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
00059   144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
00060   160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
00061   176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
00062   254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
00063   239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
00064   222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
00065   207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218 
00066 };
00067 
00068 
00070 const unsigned char Translator::Windows1251ToKoi8RTab[128] =
00071 { 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
00072   144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
00073   160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
00074   176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
00075   225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
00076   242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
00077   193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
00078   210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209
00079 };
00080 
00082 /* The method was designed initially for translator_cz.h. 
00083  * It is used for on-line encoding conversion related to
00084  * conditional compilation in Unix/MS Windows environments
00085  * (both use different encoding).  Later, the translator_hr.h
00086  * (by Boris Bralo) used and improved the same style. As the
00087  * method with the translation table was the same, the
00088  * decision to move it to this base class was made. The same
00089  * holds for ISO88592ToWin1250() method. 
00090  * 
00091  * Alexandr Chelpanov used the same approach for
00092  * Koi8RToWindows1251() and Windows1251ToKoi8R() methods.  Notice,
00093  * that he uses Unicode tables.
00094  * 
00095  * It is recommended for possibly other similar methods in future.
00096  */
00097 QCString Translator::Win1250ToISO88592(const QCString & sInput)
00098 {
00099   // The conversion table for characters >127
00100   // 
00101   
00102   QCString result;
00103   int len = sInput.length();
00104 
00105   for (int i = 0; i < len; ++i)
00106   {
00107     unsigned int c = sInput[i];  
00108     result += (c > 127) ? Win1250ToISO88592Tab[c & 0x7F] : c;
00109   }
00110   return result;
00111 }
00112 
00113 
00115 /* See the comments of the Win1250ToISO88592() method for details. */
00116 QCString Translator::ISO88592ToWin1250(const QCString & sInput)
00117 {
00118   // The conversion table for characters >127
00119   // 
00120   QCString result;
00121   int len = sInput.length();
00122 
00123   for (int i = 0; i < len; ++i)
00124   {
00125     unsigned int c = sInput[i];  
00126     result += (c > 127) ? ISO88592ToWin1250Tab[c & 0x7F] : c;
00127   }
00128   return result;
00129 }
00130 
00131 
00133 /* The method was designed initially for translator_cz.h. 
00134    It is used for on-line encoding conversion related to conditional
00135    compilation in Unix/MS Windows environments (both use different
00136    encoding). Encoding table got from QT:qtextcodec.cpp
00137  */
00138 QCString Translator::Koi8RToWindows1251( const QCString & sInput )
00139 {
00140 
00141   QCString result(sInput);
00142   int len = sInput.length();
00143 
00144   const unsigned char * c = (const unsigned char *)(const char*)sInput;
00145   unsigned char *dc = (unsigned char*)(const char*)result;
00146   for( int i=0; i<len; i++ ) {
00147     if ( c[i] > 127 )
00148       dc[i] = Koi8RToWindows1251Tab[c[i]-128];
00149   }
00150   return result;
00151 }
00152 
00153 
00155 /* See the comments of the Koi8RToWindows1251() method for details.
00156    Encoding table got from QT:qtextcodec.cpp */
00157 QCString Translator::Windows1251ToKoi8R( const QCString & sInput )
00158 {
00159   QCString result(sInput);
00160   int len = sInput.length();
00161 
00162   const unsigned char * c = (const unsigned char *)(const char*)sInput;
00163   unsigned char *dc = (unsigned char*)(const char*)result;
00164   for( int i=0; i<len; i++ ) {
00165     if ( c[i] > 127 )
00166       dc[i] = Windows1251ToKoi8RTab[c[i]-128];
00167   }
00168   return result;
00169 }
00170 
00173 unsigned int hankaku2zen(int hankaku)
00174 {
00175     static unsigned int z[64] = {
00176         0x2121,0x2123,0x2156,0x2157,0x2122,0x2126,0x2572,0x2521,
00177         0x2523,0x2525,0x2527,0x2529,0x2563,0x2565,0x2567,0x2543,
00178         0x213c,0x2522,0x2524,0x2526,0x2528,0x252a,0x252b,0x252d,
00179         0x252f,0x2531,0x2533,0x2535,0x2537,0x2539,0x253b,0x253d,
00180         0x253f,0x2541,0x2544,0x2546,0x2548,0x254a,0x254b,0x254c,
00181         0x254d,0x254e,0x254f,0x2552,0x2555,0x2558,0x255b,0x255e,
00182         0x255f,0x2560,0x2561,0x2562,0x2564,0x2566,0x2568,0x2569,
00183         0x256a,0x256b,0x256c,0x256d,0x256f,0x2573,0x212b,0x212c };
00184 
00185     if (hankaku < 0xa0 || hankaku > 0xdf) return 0;
00186     return z[hankaku - 0xa0];
00187 }
00188 
00191 unsigned int euc2sjis(unsigned int euc)
00192 {
00193     unsigned int jis;
00194     unsigned int hib, lob;
00195 
00196     if ((euc & 0xff00) == 0x8e00)
00197         jis = hankaku2zen(euc & 0xff);
00198     else jis = euc & ~0x8080;
00199     
00200     hib = (jis >> 8) & 0xff;
00201     lob = jis & 0xff;
00202     lob += (hib & 1) ? 0x1f : 0x7d;
00203     if (lob >= 0x7f) lob++;
00204     hib = ((hib - 0x21) >> 1) + 0x81;
00205     if (hib > 0x9f) hib += 0x40;
00206 
00207     return (hib << 8) | lob;
00208 }
00209 
00210 
00213 QCString Translator::JapaneseEucToSjis( const QCString & sInput )
00214 {
00215   QString result;
00216   int len = sInput.length();
00217   int c1,c2,sj;
00218 
00219   result.setUnicode(0, len);
00220   QChar* uc = (QChar*)result.unicode(); // const_cast
00221   const unsigned char * c = (const unsigned char *)(const char*)sInput;
00222   
00223   for( int i=0; i<len;)
00224     {
00225       c1 = c[i];
00226 
00227       if( c1 == EOF ) break;
00228       
00229       /* if MSB=0 then the character is ascii */
00230       if(!( c1 & 0x80))
00231         {
00232           uc[i] = c[i];
00233           i=i+1;
00234         }
00235       else
00236         {
00237           c2 = c[i+1];
00238           if( c2 == EOF ) break;
00239           sj     = euc2sjis( (c1 << 8) + c2 );
00240           uc[i]   = sj >> 8;
00241           uc[i+1] = sj & 0xff;
00242           i+=2;
00243         }
00244     }
00245 
00246   return result.latin1();
00247 
00248 }