w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

big5.c (4157B)


      1 
      2 #include "wc.h"
      3 #include "big5.h"
      4 #include "search.h"
      5 #include "wtf.h"
      6 #ifdef USE_UNICODE
      7 #include "ucs.h"
      8 #endif
      9 
     10 #define C0 WC_BIG5_MAP_C0
     11 #define GL WC_BIG5_MAP_GL
     12 #define C1 WC_BIG5_MAP_C1
     13 #define LB WC_BIG5_MAP_LB
     14 #define UB WC_BIG5_MAP_UB
     15 
     16 wc_uint8 WC_BIG5_MAP[ 0x100 ] = {
     17     C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
     18     C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
     19     GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
     20     GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
     21     LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
     22     LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
     23     LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
     24     LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0,
     25 
     26     C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
     27     C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
     28     C1, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     29     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     30     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     31     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     32     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     33     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1,
     34 };
     35 
     36 wc_wchar_t
     37 wc_big5_to_cs94w(wc_wchar_t cc)
     38 {
     39     cc.code = WC_BIG5_N(cc.code);
     40     if (cc.code < WC_C_BIG5_2_BASE)
     41 	cc.ccs = WC_CCS_BIG5_1;
     42     else {
     43 	cc.ccs = WC_CCS_BIG5_2;
     44 	cc.code -= WC_C_BIG5_2_BASE;
     45     }
     46     cc.code = WC_N_CS94W(cc.code);
     47     return cc;
     48 }
     49 
     50 wc_wchar_t
     51 wc_cs94w_to_big5(wc_wchar_t cc)
     52 {
     53     cc.code = WC_CS94W_N(cc.code);
     54     if (cc.ccs == WC_CCS_BIG5_2)
     55 	cc.code += WC_C_BIG5_2_BASE;
     56     cc.code = WC_N_BIG5(cc.code);
     57     cc.ccs = WC_CCS_BIG5;
     58     return cc;
     59 }
     60 
     61 Str
     62 wc_conv_from_big5(Str is, wc_ces ces)
     63 {
     64     Str os;
     65     wc_uchar *sp = (wc_uchar *)is->ptr;
     66     wc_uchar *ep = sp + is->length;
     67     wc_uchar *p;
     68     int state = WC_BIG5_NOSTATE;
     69 
     70     for (p = sp; p < ep && *p < 0x80; p++) 
     71 	;
     72     if (p == ep)
     73 	return is;
     74     os = Strnew_size(is->length);
     75     if (p > sp)
     76 	Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));
     77 
     78     for (; p < ep; p++) {
     79 	switch (state) {
     80 	case WC_BIG5_NOSTATE:
     81 	    switch (WC_BIG5_MAP[*p]) {
     82 	    case UB:
     83 		state = WC_BIG5_MBYTE1;
     84 		break;
     85 	    case C1:
     86 		wtf_push_unknown(os, p, 1);
     87 		break;
     88 	    default:
     89 		Strcat_char(os, (char)*p);
     90 		break;
     91 	    }
     92 	    break;
     93 	case WC_BIG5_MBYTE1:
     94 	    if (WC_BIG5_MAP[*p] & LB)
     95 		wtf_push(os, WC_CCS_BIG5, ((wc_uint32)*(p-1) << 8) | *p);
     96 	    else
     97 		wtf_push_unknown(os, p-1, 2);
     98 	    state = WC_BIG5_NOSTATE;
     99 	    break;
    100 	}
    101     }
    102     switch (state) {
    103     case WC_BIG5_MBYTE1:
    104 	wtf_push_unknown(os, p-1, 1);
    105 	break;
    106     }
    107     return os;
    108 }
    109 
    110 void
    111 wc_push_to_big5(Str os, wc_wchar_t cc, wc_status *st)
    112 {
    113   while (1) {
    114     switch (cc.ccs) {
    115     case WC_CCS_US_ASCII:
    116 	Strcat_char(os, (char)cc.code);
    117 	return;
    118     case WC_CCS_BIG5_1:
    119     case WC_CCS_BIG5_2:
    120 	cc = wc_cs94w_to_big5(cc);
    121     case WC_CCS_BIG5:
    122 	Strcat_char(os, (char)(cc.code >> 8));
    123 	Strcat_char(os, (char)(cc.code & 0xff));
    124 	return;
    125     case WC_CCS_UNKNOWN_W:
    126 	if (!WcOption.no_replace)
    127 	    Strcat_charp(os, WC_REPLACE_W);
    128 	return;
    129     case WC_CCS_UNKNOWN:
    130 	if (!WcOption.no_replace)
    131 	    Strcat_charp(os, WC_REPLACE);
    132 	return;
    133     default:
    134 #ifdef USE_UNICODE
    135 	if (WcOption.ucs_conv)
    136 	    cc = wc_any_to_any_ces(cc, st);
    137 	else
    138 #endif
    139 	    cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
    140 	continue;
    141     }
    142   }
    143 }
    144 
    145 Str
    146 wc_char_conv_from_big5(wc_uchar c, wc_status *st)
    147 {
    148     static Str os;
    149     static wc_uchar big5u;
    150 
    151     if (st->state == -1) {
    152 	st->state = WC_BIG5_NOSTATE;
    153 	os = Strnew_size(8);
    154     }
    155 
    156     switch (st->state) {
    157     case WC_BIG5_NOSTATE:
    158 	switch (WC_BIG5_MAP[c]) {
    159 	case UB:
    160 	    big5u = c;
    161 	    st->state = WC_BIG5_MBYTE1;
    162 	    return NULL;
    163 	case C1:
    164 	    break;
    165 	default:
    166 	    Strcat_char(os, (char)c);
    167 	    break;
    168 	}
    169 	break;
    170     case WC_BIG5_MBYTE1:
    171 	if (WC_BIG5_MAP[c] & LB)
    172 	    wtf_push(os, WC_CCS_BIG5, ((wc_uint32)big5u << 8) | c);
    173 	break;
    174     }
    175     st->state = -1;
    176     return os;
    177 }