w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

gbk.c (5612B)


      1 
      2 #include "wc.h"
      3 #include "gbk.h"
      4 #include "search.h"
      5 #include "wtf.h"
      6 #ifdef USE_UNICODE
      7 #include "ucs.h"
      8 #endif
      9 
     10 #include "map/gb2312_gbk.map"
     11 
     12 #define C0 WC_GBK_MAP_C0
     13 #define GL WC_GBK_MAP_GL
     14 #define C1 WC_GBK_MAP_C1
     15 #define LB WC_GBK_MAP_LB
     16 #define UB WC_GBK_MAP_UB
     17 #define C80 WC_GBK_MAP_80
     18 
     19 wc_uint8 WC_GBK_MAP[ 0x100 ] = {
     20     C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
     21     C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
     22     GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
     23     GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
     24     LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
     25     LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
     26     LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
     27     LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0,
     28 
     29     C80,UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     30     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     31     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     32     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     33     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     34     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     35     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     36     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1,
     37 };
     38 
     39 wc_ccs
     40 wc_gb2312_or_gbk(wc_uint16 code) {
     41     return wc_map_range_search(code,
     42 	gb2312_gbk_map, N_gb2312_gbk_map)
     43 	? WC_CCS_GBK : WC_CCS_GB_2312;
     44 }
     45 
     46 wc_wchar_t
     47 wc_gbk_to_cs128w(wc_wchar_t cc)
     48 {
     49     cc.code = WC_GBK_N(cc.code);
     50     if (cc.code < 0x4000)
     51 	cc.ccs = WC_CCS_GBK_1;
     52     else {
     53 	cc.ccs = WC_CCS_GBK_2;
     54 	cc.code -= 0x4000;
     55     }
     56     cc.code = WC_N_CS128W(cc.code);
     57     return cc;
     58 }
     59 
     60 wc_wchar_t
     61 wc_cs128w_to_gbk(wc_wchar_t cc)
     62 {
     63     cc.code = WC_CS128W_N(cc.code);
     64     if (cc.ccs == WC_CCS_GBK_2)
     65 	cc.code += 0x4000;
     66     cc.ccs = WC_CCS_GBK;
     67     cc.code = WC_N_GBK(cc.code);
     68     return cc;
     69 }
     70 
     71 wc_uint32
     72 wc_gbk_to_N(wc_uint32 c)
     73 {
     74     if (c <= 0xA1A0)	/* 0x8140 - 0xA1A0 */
     75 	return WC_GBK_N(c);
     76     if (c <= 0xA2AA)	/* 0xA240 - 0xA2A0, 0xA2A1 - 0xA2AA */
     77 	return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E;
     78     if (c <= 0xA6A0)	/* 0xA240 - 0xA6A0 */
     79 	return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A;
     80     if (c <= 0xA6F5)	/* 0xA6E0 - 0xA6F5 */
     81 	return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A - 0x3F;
     82     if (c <= 0xA8A0)	/* 0xA7A0 - 0xA8A0 */
     83 	return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16;
     84     if (c <= 0xA8C0)	/* 0xA8BB - 0xA8C0 */
     85 	return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 - 0x1A;
     86 			/* 0xA940 - 0xFEA0 */
     87     return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 + 0x06;
     88 }
     89 
     90 Str
     91 wc_conv_from_gbk(Str is, wc_ces ces)
     92 {
     93     Str os;
     94     wc_uchar *sp = (wc_uchar *)is->ptr;
     95     wc_uchar *ep = sp + is->length;
     96     wc_uchar *p;
     97     int state = WC_GBK_NOSTATE;
     98     wc_uint32 gbk;
     99 
    100     for (p = sp; p < ep && *p < 0x80; p++) 
    101 	;
    102     if (p == ep)
    103 	return is;
    104     os = Strnew_size(is->length);
    105     if (p > sp)
    106 	Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));
    107 
    108     for (; p < ep; p++) {
    109 	switch (state) {
    110 	case WC_GBK_NOSTATE:
    111 	    switch (WC_GBK_MAP[*p]) {
    112 	    case UB:
    113 		state = WC_GBK_MBYTE1;
    114 		break;
    115 	    case C80:
    116 		wtf_push(os, WC_CCS_GBK_80, *p);
    117 		break;
    118 	    case C1:
    119 		wtf_push_unknown(os, p, 1);
    120 		break;
    121 	    default:
    122 		Strcat_char(os, (char)*p);
    123 		break;
    124 	    }
    125 	    break;
    126 	case WC_GBK_MBYTE1:
    127 	    if (WC_GBK_MAP[*p] & LB) {
    128 		gbk = ((wc_uint32)*(p-1) << 8) | *p;
    129 		if (*(p-1) >= 0xA1 && *p >= 0xA1)
    130 		    wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
    131 		else
    132 		    wtf_push(os, WC_CCS_GBK, gbk);
    133 	    } else
    134 		wtf_push_unknown(os, p-1, 2);
    135 	    state = WC_GBK_NOSTATE;
    136 	    break;
    137 	}
    138     }
    139     switch (state) {
    140     case WC_GBK_MBYTE1:
    141 	wtf_push_unknown(os, p-1, 1);
    142 	break;
    143     }
    144     return os;
    145 }
    146 
    147 void
    148 wc_push_to_gbk(Str os, wc_wchar_t cc, wc_status *st)
    149 {
    150   while (1) {
    151     switch (cc.ccs) {
    152     case WC_CCS_US_ASCII:
    153 	Strcat_char(os, (char)cc.code);
    154 	return;
    155     case WC_CCS_GB_2312:
    156 	Strcat_char(os, (char)((cc.code >> 8) | 0x80));
    157 	Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
    158 	return;
    159     case WC_CCS_GBK_80:
    160 	Strcat_char(os, (char)(cc.code | 0x80));
    161 	return;
    162     case WC_CCS_GBK_1:
    163     case WC_CCS_GBK_2:
    164 	cc = wc_cs128w_to_gbk(cc);
    165     case WC_CCS_GBK:
    166 	Strcat_char(os, (char)(cc.code >> 8));
    167 	Strcat_char(os, (char)(cc.code & 0xff));
    168 	return;
    169     case WC_CCS_UNKNOWN_W:
    170 	if (!WcOption.no_replace)
    171 	    Strcat_charp(os, WC_REPLACE_W);
    172 	return;
    173     case WC_CCS_UNKNOWN:
    174 	if (!WcOption.no_replace)
    175 	    Strcat_charp(os, WC_REPLACE);
    176 	return;
    177     default:
    178 #ifdef USE_UNICODE
    179 	if (WcOption.ucs_conv)
    180 	    cc = wc_any_to_any_ces(cc, st);
    181 	else
    182 #endif
    183 	    cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
    184 	continue;
    185     }
    186   }
    187 }
    188 
    189 Str
    190 wc_char_conv_from_gbk(wc_uchar c, wc_status *st)
    191 {
    192     static Str os;
    193     static wc_uchar gbku;
    194     wc_uint32 gbk;
    195 
    196     if (st->state == -1) {
    197 	st->state = WC_GBK_NOSTATE;
    198 	os = Strnew_size(8);
    199     }
    200 
    201     switch (st->state) {
    202     case WC_GBK_NOSTATE:
    203 	switch (WC_GBK_MAP[c]) {
    204 	case UB:
    205 	    gbku = c;
    206 	    st->state = WC_GBK_MBYTE1;
    207 	    return NULL;
    208 	case C80:
    209 	    wtf_push(os, WC_CCS_GBK_80, c);
    210 	    break;
    211 	case C1:
    212 	    break;
    213 	default:
    214 	    Strcat_char(os, (char)c);
    215 	    break;
    216 	}
    217 	break;
    218     case WC_GBK_MBYTE1:
    219 	if (WC_GBK_MAP[c] & LB) {
    220 	    gbk = ((wc_uint32)gbku << 8) | c;
    221 	    if (gbku >= 0xA1 && c >= 0xA1)
    222 		wtf_push(os, wc_gb2312_or_gbk(gbk), gbk);
    223 	    else
    224 		wtf_push(os, WC_CCS_GBK, gbk);
    225 	}
    226 	break;
    227     }
    228     st->state = -1;
    229     return os;
    230 }