w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

wtf.c (15114B)


      1 
      2 #include "wc.h"
      3 #include "wtf.h"
      4 #include "sjis.h"
      5 #include "big5.h"
      6 #include "hkscs.h"
      7 #include "johab.h"
      8 #include "jis.h"
      9 #include "viet.h"
     10 #include "gbk.h"
     11 #include "gb18030.h"
     12 #include "uhc.h"
     13 #ifdef USE_UNICODE
     14 #include "ucs.h"
     15 #include "utf8.h"
     16 #endif
     17 
     18 wc_uint8 WTF_WIDTH_MAP[ 0x100 ] = {
     19     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     20     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     21     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     22     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     23 
     24     1,2,1,2,1,1,1,2, 1,2,1,2,1,1,1,1, 0,0,0,0,0,0,0,0, 0,0,0,0,1,1,1,1,
     25     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     26     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     27     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     28 };
     29 
     30 wc_uint8 WTF_LEN_MAP[ 0x100 ] = {
     31     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     32     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     33     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     34     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     35 
     36     3,4,3,4,3,3,3,4, 4,4,6,6,1,1,1,1, 3,4,3,4,3,3,3,4, 4,4,6,6,1,1,1,1,
     37     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     38     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     39     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     40 };
     41 
     42 wc_uint8 WTF_TYPE_MAP[ 0x100 ] = {
     43     1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
     44     0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
     45     0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
     46     0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,
     47 
     48     2,  0xA,2,  0xA, 2, 0x12,2,  0xA, 2,  0xA,2,  0xA, 0x20,0x20,0x20,0x20,
     49     4,  0xC,4,  0xC, 4, 0x20,4,  0xC, 4,  0xC,4,  0xC, 0x20,0x20,0x20,0x20,
     50  0x20,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
     51     2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
     52     2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
     53 };
     54 
     55 static wc_uint16 CCS_MAP[ 33 ] = {
     56     WC_CCS_A_CS94    >> 8, WC_CCS_A_CS94W    >> 8,
     57     WC_CCS_A_CS96    >> 8, WC_CCS_A_CS96W    >> 8,
     58     WC_CCS_A_CS942   >> 8, WC_CCS_A_UNKNOWN  >> 8,
     59     WC_CCS_A_PCS     >> 8, WC_CCS_A_PCSW     >> 8,
     60     WC_CCS_A_WCS16   >> 8, WC_CCS_A_WCS16W   >> 8,
     61     WC_CCS_A_WCS32   >> 8, WC_CCS_A_WCS32W   >> 8,
     62     0,                     0,
     63     0,                     0,
     64     WC_CCS_A_CS94_C  >> 8, WC_CCS_A_CS94W_C  >> 8,
     65     WC_CCS_A_CS96_C  >> 8, WC_CCS_A_CS96W_C  >> 8,
     66     WC_CCS_A_CS942_C >> 8, 0,
     67     WC_CCS_A_PCS_C   >> 8, WC_CCS_A_PCSW_C   >> 8,
     68     WC_CCS_A_WCS16_C >> 8, WC_CCS_A_WCS16W_C >> 8,
     69     WC_CCS_A_WCS32_C >> 8, WC_CCS_A_WCS32W_C >> 8,
     70     0,                     0,
     71     0,                     0,
     72     0,
     73 };
     74 
     75 wc_ccs wtf_gr_ccs = 0;
     76 static wc_ces wtf_major_ces = WC_CES_US_ASCII;
     77 static wc_status wtf_major_st;
     78 
     79 void
     80 wtf_init(wc_ces ces1, wc_ces ces2)
     81 {
     82     int i;
     83     wc_gset *gset;
     84 
     85     if (wc_check_ces(ces2))
     86 	wtf_major_ces = ces2;
     87 
     88     if (! wc_check_ces(ces1))
     89 	return;
     90     gset = WcCesInfo[WC_CES_INDEX(ces1)].gset;
     91     if (gset == NULL || gset[1].ccs == 0 ||
     92 	gset[1].ccs & (WC_CCS_A_WCS16|WC_CCS_A_WCS32))
     93 	return;
     94     wtf_gr_ccs = gset[1].ccs;
     95 
     96     if (WC_CCS_IS_WIDE(wtf_gr_ccs)) {
     97 	for (i = 0xa1; i <= 0xff; i++) {
     98 	    WTF_WIDTH_MAP[i] = 2;
     99 	    WTF_LEN_MAP[i] = 2;
    100 	    WTF_TYPE_MAP[i] = WTF_TYPE_WCHAR1W;
    101 	}
    102     } else {
    103 	for (i = 0xa1; i <= 0xff; i++) {
    104 	    WTF_WIDTH_MAP[i] = 1;
    105 	    WTF_LEN_MAP[i] = 1;
    106 	    WTF_TYPE_MAP[i] = WTF_TYPE_WCHAR1;
    107 	}
    108     }
    109 }
    110 
    111 /*
    112 int
    113 wtf_width(wc_uchar *p)
    114 {
    115     return (int)WTF_WIDTH_MAP[*p];
    116 }
    117 */
    118 
    119 int
    120 wtf_strwidth(wc_uchar *p)
    121 {
    122     int w = 0;
    123 
    124     while (*p) {
    125 	w += wtf_width(p);
    126 	p += WTF_LEN_MAP[*p];
    127     }
    128     return w;
    129 }
    130 
    131 /*
    132 size_t
    133 wtf_len1(wc_uchar *p)
    134 {
    135     return (size_t)WTF_LEN_MAP[*p];
    136 }
    137 */
    138 
    139 size_t
    140 wtf_len(wc_uchar *p)
    141 {
    142     wc_uchar *q = p;
    143 
    144     q += WTF_LEN_MAP[*q];
    145     while (*q && ! WTF_WIDTH_MAP[*q])
    146 	q += WTF_LEN_MAP[*q];
    147     return q - p;
    148 }
    149 
    150 /*
    151 int
    152 wtf_type(wc_uchar *p)
    153 {
    154     return (int)WTF_TYPE_MAP[*p];
    155 }
    156 */
    157 
    158 #define wcs16_to_wtf(c, p) \
    159     ((p)[0] = (((c) >> 14) & 0x03) | 0x80), \
    160     ((p)[1] = (((c) >>  7) & 0x7f) | 0x80), \
    161     ((p)[2] = ( (c)        & 0x7f) | 0x80)
    162 #define wcs32_to_wtf(c, p) \
    163     ((p)[0] = (((c) >> 28) & 0x0f) | 0x80), \
    164     ((p)[1] = (((c) >> 21) & 0x7f) | 0x80), \
    165     ((p)[2] = (((c) >> 14) & 0x7f) | 0x80), \
    166     ((p)[3] = (((c) >>  7) & 0x7f) | 0x80), \
    167     ((p)[4] = ( (c)        & 0x7f) | 0x80)
    168 #define wtf_to_wcs16(p) \
    169       ((wc_uint32)((p)[0] & 0x03) << 14) \
    170     | ((wc_uint32)((p)[1] & 0x7f) <<  7) \
    171     | ((wc_uint32)((p)[2] & 0x7f)      )
    172 #define wtf_to_wcs32(p) \
    173       ((wc_uint32)((p)[0] & 0x0f) << 28) \
    174     | ((wc_uint32)((p)[1] & 0x7f) << 21) \
    175     | ((wc_uint32)((p)[2] & 0x7f) << 14) \
    176     | ((wc_uint32)((p)[3] & 0x7f) <<  7) \
    177     | ((wc_uint32)((p)[4] & 0x7f)      )
    178 
    179 void
    180 wtf_push(Str os, wc_ccs ccs, wc_uint32 code)
    181 {
    182     wc_uchar s[8];
    183     wc_wchar_t cc, cc2;
    184     size_t n;
    185 
    186     if (ccs == WC_CCS_US_ASCII) {
    187 	Strcat_char(os, (char)(code & 0x7f));
    188 	return;
    189     }
    190     cc.ccs = ccs;
    191     cc.code = code;
    192     if (WcOption.pre_conv && !(cc.ccs & WC_CCS_A_UNKNOWN)) {
    193 	if ((ccs == WC_CCS_JOHAB || ccs == WC_CCS_JOHAB_1 ||
    194 		ccs == WC_CCS_JOHAB_2 || ccs == WC_CCS_JOHAB_3) &&
    195 		(wtf_major_ces == WC_CES_EUC_KR ||
    196 		wtf_major_ces == WC_CES_ISO_2022_KR)) {
    197 	    cc2 = wc_johab_to_ksx1001(cc);
    198 	    if (!WC_CCS_IS_UNKNOWN(cc2.ccs))
    199 		cc = cc2;
    200 	} else if (ccs == WC_CCS_KS_X_1001 &&
    201 		wtf_major_ces == WC_CES_JOHAB) {
    202 	    cc2 = wc_ksx1001_to_johab(cc);
    203 	    if (!WC_CCS_IS_UNKNOWN(cc2.ccs))
    204 		cc = cc2;
    205 	}
    206 #ifdef USE_UNICODE
    207 	else if (WcOption.ucs_conv) {
    208 	    wc_bool fix_width_conv = WcOption.fix_width_conv;
    209 	    WcOption.fix_width_conv = WC_FALSE;
    210 	    wc_output_init(wtf_major_ces, &wtf_major_st);
    211 	    if (! wc_ces_has_ccs(WC_CCS_SET(ccs), &wtf_major_st)) {
    212 		cc2 = wc_any_to_any_ces(cc, &wtf_major_st);
    213 		if (cc2.ccs == WC_CCS_US_ASCII) {
    214 		    Strcat_char(os, (char)(cc2.code & 0x7f));
    215 		    return;
    216 		}
    217 		if (!WC_CCS_IS_UNKNOWN(cc2.ccs) &&
    218 			cc2.ccs != WC_CCS_CP1258_2 &&
    219 			cc2.ccs != WC_CCS_TCVN_5712_3)
    220 		    cc = cc2;
    221 	    }
    222 	    WcOption.fix_width_conv = fix_width_conv;
    223 	}
    224 #endif
    225     }
    226 
    227     switch (WC_CCS_TYPE(cc.ccs)) {
    228     case WC_CCS_A_CS94:
    229 	if (cc.ccs == wtf_gr_ccs) {
    230 	    s[0] = (cc.code & 0x7f) | 0x80;
    231 	    n = 1;
    232 	    break;
    233 	}
    234 	if (cc.ccs == WC_CCS_JIS_X_0201K && !WcOption.use_jisx0201k) {
    235 	    cc2 = wc_jisx0201k_to_jisx0208(cc);
    236 	    if (!WC_CCS_IS_UNKNOWN(cc2.ccs)) {
    237 		wtf_push(os, cc2.ccs, cc2.code);
    238 		return;
    239 	    }
    240 	}
    241 	s[0] = WTF_C_CS94;
    242 	s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
    243 	s[2] = (cc.code & 0x7f) | 0x80;
    244 	n = 3;
    245 	break;
    246     case WC_CCS_A_CS94W:
    247 	if (cc.ccs == wtf_gr_ccs) {
    248 	    s[0] = ((cc.code >> 8) & 0x7f) | 0x80;
    249 	    s[1] = ( cc.code       & 0x7f) | 0x80;
    250 	    n = 2;
    251 	    break;
    252 	}
    253 	s[0] = WTF_C_CS94W;
    254 	s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
    255 	s[2] = ((cc.code >> 8) & 0x7f) | 0x80;
    256 	s[3] = ( cc.code       & 0x7f) | 0x80;
    257 	n = 4;
    258 	break;
    259     case WC_CCS_A_CS96:
    260 	if (WcOption.use_combining && wc_is_combining(cc))
    261 	    s[0] = WTF_C_CS96_C;
    262 	else if (cc.ccs == wtf_gr_ccs && (cc.code & 0x7f) > 0x20) {
    263 	    s[0] = (cc.code & 0x7f) | 0x80;
    264 	    n = 1;
    265 	    break;
    266 	} else
    267 	    s[0] = WTF_C_CS96;
    268 	s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
    269 	s[2] = (cc.code & 0x7f) | 0x80;
    270 	n = 3;
    271 	break;
    272     case WC_CCS_A_CS96W:
    273 	if (cc.ccs == wtf_gr_ccs && ((cc.code >> 8) & 0x7f) > 0x20) {
    274 	    s[0] = ((cc.code >> 8) & 0x7f) | 0x80;
    275 	    s[1] = ( cc.code       & 0x7f) | 0x80;
    276 	    n = 2;
    277 	    break;
    278 	}
    279 	s[0] = WTF_C_CS96W;
    280 	s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
    281 	s[2] = ((cc.code >> 8) & 0x7f) | 0x80;
    282 	s[3] = ( cc.code       & 0x7f) | 0x80;
    283 	n = 4;
    284 	break;
    285     case WC_CCS_A_CS942:
    286 	if (cc.ccs == wtf_gr_ccs) {
    287 	    s[0] = (cc.code & 0x7f) | 0x80;
    288 	    n = 1;
    289 	    break;
    290 	}
    291 	s[0] = WTF_C_CS942;
    292 	s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
    293 	s[2] = (cc.code & 0x7f) | 0x80;
    294 	n = 3;
    295 	break;
    296     case WC_CCS_A_PCS:
    297 	if (WcOption.use_combining && wc_is_combining(cc))
    298 	    s[0] = WTF_C_PCS_C;
    299 	else if (cc.ccs == wtf_gr_ccs && (cc.code & 0x7f) > 0x20) {
    300 	    s[0] = (cc.code & 0x7f) | 0x80;
    301 	    n = 1;
    302 	    break;
    303 	} else
    304 	    s[0] = WTF_C_PCS;
    305 	s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
    306 	s[2] = (cc.code & 0x7f) | 0x80;
    307 	n = 3;
    308 	break;
    309     case WC_CCS_A_PCSW:
    310 	switch (cc.ccs) {
    311 	case WC_CCS_SJIS_EXT:
    312 	    cc = wc_sjis_ext_to_cs94w(cc);
    313 	    break;
    314 	case WC_CCS_GBK:
    315 	    cc = wc_gbk_to_cs128w(cc);
    316 	    break;
    317 	case WC_CCS_GBK_EXT:
    318 	    cc = wc_gbk_ext_to_cs128w(cc);
    319 	    break;
    320 	case WC_CCS_BIG5:
    321 	    cc = wc_big5_to_cs94w(cc);
    322 	    break;
    323 	case WC_CCS_HKSCS:
    324 	    cc = wc_hkscs_to_cs128w(cc);
    325 	    break;
    326 	case WC_CCS_JOHAB:
    327 	    cc = wc_johab_to_cs128w(cc);
    328 	    break;
    329 	case WC_CCS_UHC:
    330 	    cc = wc_uhc_to_cs128w(cc);
    331 	    break;
    332 	}
    333 	if (cc.ccs == wtf_gr_ccs && ((cc.code >> 8) & 0x7f) > 0x20) {
    334 	    s[0] = ((cc.code >> 8) & 0x7f) | 0x80;
    335 	    s[1] = ( cc.code       & 0x7f) | 0x80;
    336 	    n = 2;
    337 	    break;
    338 	}
    339 	s[0] = WTF_C_PCSW;
    340 	s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
    341 	s[2] = ((cc.code >> 8) & 0x7f) | 0x80;
    342 	s[3] = ( cc.code       & 0x7f) | 0x80;
    343 	n = 4;
    344 	break;
    345     case WC_CCS_A_WCS16:
    346 	s[0] = (WC_CCS_IS_WIDE(cc.ccs) ? WTF_C_WCS16W : WTF_C_WCS16)
    347 	     | (WC_CCS_IS_COMB(cc.ccs) ? WTF_C_COMB : 0);
    348 	wcs16_to_wtf(cc.code, s + 1);
    349 	s[1] |= (WC_CCS_INDEX(cc.ccs) << 2);
    350 	n = 4;
    351 	break;
    352     case WC_CCS_A_WCS32:
    353 	s[0] = (WC_CCS_IS_WIDE(cc.ccs) ? WTF_C_WCS32W : WTF_C_WCS32)
    354 	     | (WC_CCS_IS_COMB(cc.ccs) ? WTF_C_COMB : 0);
    355 	wcs32_to_wtf(cc.code, s + 1);
    356 	s[1] |= (WC_CCS_INDEX(cc.ccs) << 4);
    357 	n = 6;
    358 	break;
    359     default:
    360 	s[0] = WTF_C_UNKNOWN;
    361 	s[1] = WC_CCS_INDEX(cc.ccs) | 0x80;
    362 	s[2] = (cc.code & 0x7f) | 0x80;
    363 	n = 3;
    364 	break;
    365     }
    366     Strcat_charp_n(os, (char *)s, n);
    367 }
    368 
    369 void
    370 wtf_push_unknown(Str os, wc_uchar *p, size_t len)
    371 {
    372     for (; len--; p++) {
    373 	if (*p & 0x80)
    374 	    wtf_push(os, WC_CCS_UNKNOWN, *p);
    375 	else
    376 	    Strcat_char(os, (char)*p);
    377     }
    378 }
    379 
    380 wc_wchar_t
    381 wtf_parse1(wc_uchar **p)
    382 {
    383     wc_uchar *q = *p;
    384     wc_wchar_t cc;
    385 
    386     if (*q < 0x80) {
    387 	cc.ccs = WC_CCS_US_ASCII;
    388 	cc.code = *(q++);
    389     } else if (*q > 0xa0) {
    390 	cc.ccs = wtf_gr_ccs;
    391 	if (WC_CCS_IS_WIDE(cc.ccs)) {
    392 	    cc.code = ((wc_uint32)*q << 8) | *(q+1);
    393 	    q += 2;
    394 	} else
    395 	    cc.code = *(q++);
    396     } else {
    397 	cc.ccs = (wc_uint32)CCS_MAP[*(q++) - 0x80] << 8;
    398 	switch (WC_CCS_TYPE(cc.ccs)) {
    399 	case WC_CCS_A_CS94:
    400 	case WC_CCS_A_CS96:
    401 	case WC_CCS_A_CS942:
    402 	case WC_CCS_A_PCS:
    403 	case WC_CCS_A_UNKNOWN:
    404 	    cc.ccs |= *(q++) & 0x7f;
    405 	    cc.code = *(q++);
    406 	    break;
    407 	case WC_CCS_A_CS94W:
    408 	case WC_CCS_A_CS96W:
    409 	case WC_CCS_A_PCSW:
    410 	    cc.ccs |= *(q++) & 0x7f;
    411 	    cc.code = ((wc_uint32)*q << 8) | *(q+1);
    412 	    q += 2;
    413 	    break;
    414 	case WC_CCS_A_WCS16:
    415 	case WC_CCS_A_WCS16W:
    416 	    cc.ccs |= (*q & 0x7c) >> 2;
    417 	    cc.code = wtf_to_wcs16(q);
    418 	    q += 3;
    419 	    break;
    420 	case WC_CCS_A_WCS32:
    421 	case WC_CCS_A_WCS32W:
    422 	    cc.ccs |= (*q & 0x70) >> 4;
    423 	    cc.code = wtf_to_wcs32(q);
    424 	    q += 5;
    425 	    break;
    426 	default:
    427 	/* case 0: */
    428 	    cc.ccs = WC_CCS_US_ASCII;
    429 	    cc.code = (wc_uint32)' ';
    430 	    break;
    431 	}
    432     }
    433 
    434     *p = q;
    435     switch (cc.ccs) {
    436     case WC_CCS_SJIS_EXT_1:
    437     case WC_CCS_SJIS_EXT_2:
    438 	return wc_cs94w_to_sjis_ext(cc);
    439     case WC_CCS_GBK_1:
    440     case WC_CCS_GBK_2:
    441 	return wc_cs128w_to_gbk(cc);
    442     case WC_CCS_GBK_EXT_1:
    443     case WC_CCS_GBK_EXT_2:
    444 	return wc_cs128w_to_gbk_ext(cc);
    445     case WC_CCS_BIG5_1:
    446     case WC_CCS_BIG5_2:
    447 	return wc_cs94w_to_big5(cc);
    448     case WC_CCS_HKSCS_1:
    449     case WC_CCS_HKSCS_2:
    450 	return wc_cs128w_to_hkscs(cc);
    451     case WC_CCS_JOHAB_1:
    452     case WC_CCS_JOHAB_2:
    453     case WC_CCS_JOHAB_3:
    454 	return wc_cs128w_to_johab(cc);
    455     case WC_CCS_UHC_1:
    456     case WC_CCS_UHC_2:
    457 	return wc_cs128w_to_uhc(cc);
    458     }
    459     return cc;
    460 }
    461 
    462 wc_wchar_t
    463 wtf_parse(wc_uchar **p)
    464 {
    465     wc_uchar *q;
    466     wc_wchar_t cc, cc2;
    467     wc_uint32 ucs, ucs2;
    468 
    469     if (**p < 0x80) {
    470 	cc.ccs = WC_CCS_US_ASCII;
    471 	cc.code = *((*p)++);
    472     } else
    473 	cc = wtf_parse1(p);
    474     if ((! WcOption.use_combining) || WTF_WIDTH_MAP[**p])
    475 	return cc;
    476 
    477     q = *p;
    478     cc2 = wtf_parse1(&q);
    479     if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_CP1258_1) &&
    480 	WC_CCS_SET(cc2.ccs) == WC_CCS_CP1258_1) {
    481 	cc2.code = wc_cp1258_precompose(cc.code, cc2.code);
    482 	if (cc2.code) {
    483 	    cc2.ccs = WC_CCS_CP1258_2;
    484 	    *p = q;
    485 	    return cc2;
    486 	}
    487     } else if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_TCVN_5712_1) &&
    488 	WC_CCS_SET(cc2.ccs) == WC_CCS_TCVN_5712_1) {
    489 	cc2.code = wc_tcvn5712_precompose(cc.code, cc2.code);
    490 	if (cc2.code) {
    491 	    cc2.ccs = WC_CCS_TCVN_5712_3;
    492 	    *p = q;
    493 	    return cc2;
    494 	}
    495     }
    496 #ifdef USE_UNICODE
    497     else if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_ISO_8859_1 ||
    498 	WC_CCS_IS_UNICODE(cc.ccs)) && WC_CCS_IS_UNICODE(cc2.ccs)) {
    499 	while (1) {
    500 	    ucs = (WC_CCS_SET(cc.ccs) == WC_CCS_UCS_TAG)
    501 		? wc_ucs_tag_to_ucs(cc.code) : cc.code;
    502 	    ucs2 = (WC_CCS_SET(cc2.ccs) == WC_CCS_UCS_TAG)
    503 		? wc_ucs_tag_to_ucs(cc2.code) : cc2.code;
    504 	    ucs = wc_ucs_precompose(ucs, ucs2);
    505 	    if (ucs == WC_C_UCS4_ERROR)
    506 		break;
    507 	    if (WC_CCS_SET(cc.ccs) == WC_CCS_UCS_TAG)
    508 		cc.code = wc_ucs_to_ucs_tag(ucs, wc_ucs_tag_to_tag(cc.code));
    509 	    else {
    510 		cc.ccs = wc_ucs_to_ccs(ucs);
    511 		cc.code = ucs;
    512 	    }
    513 	    *p = q;
    514 	    if (! WTF_WIDTH_MAP[*q])
    515 		break;
    516 	    cc2 = wtf_parse1(&q);
    517 	    if (! WC_CCS_IS_UNICODE(cc2.ccs))
    518 		break;
    519 	}
    520     }
    521 #endif
    522     return cc;
    523 }
    524 
    525 wc_ccs
    526 wtf_get_ccs(wc_uchar *p)
    527 {
    528    return wtf_parse1(&p).ccs;
    529 }
    530 
    531 wc_uint32
    532 wtf_get_code(wc_uchar *p)
    533 {
    534    return wtf_parse1(&p).code;
    535 }
    536 
    537 wc_bool
    538 wtf_is_hangul(wc_uchar *p)
    539 {
    540     if (*p > 0xa0)
    541 	return (wtf_gr_ccs == WC_CCS_KS_X_1001 || wtf_gr_ccs == WC_CCS_JOHAB_1);
    542     else if (*p == WTF_C_CS94W)
    543 	return ((*(p + 1) & 0x7f) == WC_F_KS_X_1001);
    544     else if (*p == WTF_C_PCSW) {
    545 	wc_uchar f = *(p + 1) & 0x7f;
    546 	return (f == WC_F_JOHAB_1 || f == WC_F_JOHAB_2 || f == WC_F_JOHAB_3 ||
    547 		f == WC_F_UHC_1 || f == WC_F_UHC_2);
    548     }
    549 #ifdef USE_UNICODE
    550     else if (*p == WTF_C_WCS16W) {
    551 	wc_uchar f = (*(++p) & 0x7f) >> 2;
    552 	if (f == WC_F_UCS2)
    553 	    return wc_is_ucs_hangul(wtf_to_wcs16(p));
    554     } else if (*p == WTF_C_WCS32W) {
    555 	wc_uchar f = (*(++p) & 0x7f) >> 4;
    556 	if (f == WC_F_UCS_TAG)
    557 	    return wc_is_ucs_hangul(wc_ucs_tag_to_ucs(wtf_to_wcs32(p)));
    558     }
    559 #endif
    560     return WC_FALSE;
    561 }
    562 
    563 char *
    564 wtf_conv_fit(char *s, wc_ces ces)
    565 {
    566     wc_uchar *p;
    567     Str os;
    568     wc_wchar_t cc;
    569     wc_ces major_ces;
    570     wc_bool pre_conv, ucs_conv;
    571 
    572     if (ces == WC_CES_WTF || ces == WC_CES_US_ASCII)
    573 	return s;
    574 
    575     for (p = (wc_uchar *)s; *p && *p < 0x80; p++)
    576 	;
    577     if (! *p)
    578 	return s;
    579 
    580     os = Strnew_size(strlen(s));
    581     if (p > (wc_uchar *)s)
    582 	Strcopy_charp_n(os, s, (int)(p - (wc_uchar *)s));
    583 
    584     major_ces = wtf_major_ces;
    585     pre_conv = WcOption.pre_conv;
    586     ucs_conv = WcOption.ucs_conv;
    587     wtf_major_ces = ces;
    588     WcOption.pre_conv = WC_TRUE;
    589     WcOption.ucs_conv = WC_TRUE;
    590     while (*p) {
    591 	cc = wtf_parse1(&p);
    592 	wtf_push(os, cc.ccs, cc.code);
    593     }
    594     wtf_major_ces = major_ces;
    595     WcOption.pre_conv = pre_conv;
    596     WcOption.ucs_conv = ucs_conv;
    597     return os->ptr;
    598 }