w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

uhc.c (4875B)


      1 
      2 #include "wc.h"
      3 #include "uhc.h"
      4 #include "wtf.h"
      5 #ifdef USE_UNICODE
      6 #include "ucs.h"
      7 #endif
      8 
      9 #define C0 WC_UHC_MAP_C0
     10 #define GL WC_UHC_MAP_GL
     11 #define C1 WC_UHC_MAP_C1
     12 #define LB WC_UHC_MAP_LB
     13 #define UB WC_UHC_MAP_UB
     14 
     15 wc_uint8 WC_UHC_MAP[ 0x100 ] = {
     16     C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
     17     C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
     18     GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
     19     GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
     20     GL, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
     21     LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, GL, GL, GL, GL, GL,
     22     GL, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB,
     23     LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, GL, GL, GL, GL, C0,
     24 
     25     C1, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     26     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     27     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     28     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     29     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     30     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     31     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB,
     32     UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1,
     33 };
     34 
     35 wc_wchar_t
     36 wc_uhc_to_cs128w(wc_wchar_t cc)
     37 {
     38     cc.code = WC_UHC_N(cc.code);
     39     if (cc.code < 0x4000)
     40 	cc.ccs = WC_CCS_UHC_1;
     41     else {
     42 	cc.ccs = WC_CCS_UHC_2;
     43 	cc.code -= 0x4000;
     44     }
     45     cc.code = WC_N_CS128W(cc.code);
     46     return cc;
     47 }
     48 
     49 wc_wchar_t
     50 wc_cs128w_to_uhc(wc_wchar_t cc)
     51 {
     52     cc.code = WC_CS128W_N(cc.code);
     53     if (cc.ccs == WC_CCS_UHC_2)
     54 	cc.code += 0x4000;
     55     cc.ccs = WC_CCS_UHC;
     56     cc.code = WC_N_UHC(cc.code);
     57     return cc;
     58 }
     59 
     60 wc_uint32
     61 wc_uhc_to_N(wc_uint32 c)
     62 {
     63     if (c <= 0xA1A0)	/* 0x8141 - 0xA1A0 */
     64 	return WC_UHC_N(c);
     65     if (c <= 0xA2A0)	/* 0xA240 - 0xA2A0 */
     66 	return WC_UHC_N(c) - 0x5E;
     67     if (c <= 0xA2E7)	/* 0xA2E6 - 0xA2E7 */
     68 	return WC_UHC_N(0xA2A0) - 0x5E + c - 0xA2E5;
     69 			/* 0xA340 - 0xFEA0 */
     70     return WC_UHC_N(c) - ((c >> 8) - 0xA1) * 0x5E + 2;
     71 }
     72 
     73 Str
     74 wc_conv_from_uhc(Str is, wc_ces ces)
     75 {
     76     Str os;
     77     wc_uchar *sp = (wc_uchar *)is->ptr;
     78     wc_uchar *ep = sp + is->length;
     79     wc_uchar *p;
     80     int state = WC_UHC_NOSTATE;
     81     wc_uint32 uhc;
     82 
     83     for (p = sp; p < ep && *p < 0x80; p++) 
     84 	;
     85     if (p == ep)
     86 	return is;
     87     os = Strnew_size(is->length);
     88     if (p > sp)
     89 	Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp));
     90 
     91     for (; p < ep; p++) {
     92 	switch (state) {
     93 	case WC_UHC_NOSTATE:
     94 	    switch (WC_UHC_MAP[*p]) {
     95 	    case UB:
     96 		state = WC_UHC_MBYTE1;
     97 		break;
     98 	    case C1:
     99 		wtf_push_unknown(os, p, 1);
    100 		break;
    101 	    default:
    102 		Strcat_char(os, (char)*p);
    103 		break;
    104 	    }
    105 	    break;
    106 	case WC_UHC_MBYTE1:
    107 	    if (WC_UHC_MAP[*p] & LB) {
    108 		uhc = ((wc_uint32)*(p-1) << 8) | *p;
    109 		if (*(p-1) >= 0xA1 && *p >= 0xA1 &&
    110 		    uhc != 0xA2E6 && uhc != 0xA2E7)
    111 		    wtf_push(os, WC_CCS_KS_X_1001, uhc);
    112 		else
    113 		    wtf_push(os, WC_CCS_UHC, uhc);
    114 	    } else
    115 		wtf_push_unknown(os, p-1, 2);
    116 	    state = WC_UHC_NOSTATE;
    117 	    break;
    118 	}
    119     }
    120     switch (state) {
    121     case WC_UHC_MBYTE1:
    122 	wtf_push_unknown(os, p-1, 1);
    123 	break;
    124     }
    125     return os;
    126 }
    127 
    128 void
    129 wc_push_to_uhc(Str os, wc_wchar_t cc, wc_status *st)
    130 {
    131   while (1) {
    132     switch (cc.ccs) {
    133     case WC_CCS_US_ASCII:
    134 	Strcat_char(os, (char)cc.code);
    135 	return;
    136     case WC_CCS_KS_X_1001:
    137 	Strcat_char(os, (char)((cc.code >> 8) | 0x80));
    138 	Strcat_char(os, (char)((cc.code & 0xff) | 0x80));
    139 	return;
    140     case WC_CCS_UHC_1:
    141     case WC_CCS_UHC_2:
    142 	cc = wc_cs128w_to_uhc(cc);
    143     case WC_CCS_UHC:
    144 	Strcat_char(os, (char)(cc.code >> 8));
    145 	Strcat_char(os, (char)(cc.code & 0xff));
    146 	return;
    147     case WC_CCS_UNKNOWN_W:
    148 	if (!WcOption.no_replace)
    149 	    Strcat_charp(os, WC_REPLACE_W);
    150 	return;
    151     case WC_CCS_UNKNOWN:
    152 	if (!WcOption.no_replace)
    153 	    Strcat_charp(os, WC_REPLACE);
    154 	return;
    155     default:
    156 #ifdef USE_UNICODE
    157 	if (WcOption.ucs_conv)
    158 	    cc = wc_any_to_any_ces(cc, st);
    159 	else
    160 #endif
    161 	    cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
    162 	continue;
    163     }
    164   }
    165 }
    166 
    167 Str
    168 wc_char_conv_from_uhc(wc_uchar c, wc_status *st)
    169 {
    170     static Str os;
    171     static wc_uchar uhcu;
    172     wc_uint32 uhc;
    173 
    174     if (st->state == -1) {
    175 	st->state = WC_UHC_NOSTATE;
    176 	os = Strnew_size(8);
    177     }
    178 
    179     switch (st->state) {
    180     case WC_UHC_NOSTATE:
    181 	switch (WC_UHC_MAP[c]) {
    182 	case UB:
    183 	    uhcu = c;
    184 	    st->state = WC_UHC_MBYTE1;
    185 	    return NULL;
    186 	case C1:
    187 	    break;
    188 	default:
    189 	    Strcat_char(os, (char)c);
    190 	    break;
    191 	}
    192 	break;
    193     case WC_UHC_MBYTE1:
    194 	if (WC_UHC_MAP[c] & LB) {
    195 	    uhc = ((wc_uint32)uhcu << 8) | c;
    196 	    if (uhcu >= 0xA1 && c >= 0xA1 &&
    197 		uhc != 0xA2E6 && uhc != 0xA2E7)
    198 		wtf_push(os, WC_CCS_KS_X_1001, uhc);
    199 	    else
    200 		wtf_push(os, WC_CCS_UHC, uhc);
    201 	}
    202 	break;
    203     }
    204     st->state = -1;
    205     return os;
    206 }