w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

mimehead.c (6323B)


      1 /* $Id$ */
      2 /* 
      3  * MIME header support by Akinori ITO
      4  */
      5 
      6 #include <sys/types.h>
      7 #include "fm.h"
      8 #include "myctype.h"
      9 #include "Str.h"
     10 
     11 #define MIME_ENCODED_LINE_LIMIT	80
     12 #define MIME_ENCODED_WORD_LENGTH_OFFSET 18
     13 #define MIME_ENCODED_WORD_LENGTH_ESTIMATION(x) \
     14 	(((x)+2)*4/3+MIME_ENCODED_WORD_LENGTH_OFFSET)
     15 #define MIME_DECODED_WORD_LENGTH_ESTIMATION(x) \
     16 	(((x)-MIME_ENCODED_WORD_LENGTH_OFFSET)/4*3)
     17 #define J_CHARSET "ISO-2022-JP"
     18 
     19 #define BAD_BASE64 255
     20 
     21 static
     22     unsigned char
     23 c2e(char x)
     24 {
     25     if ('A' <= x && x <= 'Z')
     26 	return (x) - 'A';
     27     if ('a' <= x && x <= 'z')
     28 	return (x) - 'a' + 26;
     29     if ('0' <= x && x <= '9')
     30 	return (x) - '0' + 52;
     31     if (x == '+')
     32 	return 62;
     33     if (x == '/')
     34 	return 63;
     35     return BAD_BASE64;
     36 }
     37 
     38 static
     39     int
     40 ha2d(char x, char y)
     41 {
     42     int r = 0;
     43 
     44     if ('0' <= x && x <= '9')
     45 	r = x - '0';
     46     else if ('A' <= x && x <= 'F')
     47 	r = x - 'A' + 10;
     48     else if ('a' <= x && x <= 'f')
     49 	r = x - 'a' + 10;
     50 
     51     r <<= 4;
     52 
     53     if ('0' <= y && y <= '9')
     54 	r += y - '0';
     55     else if ('A' <= y && y <= 'F')
     56 	r += y - 'A' + 10;
     57     else if ('a' <= y && y <= 'f')
     58 	r += y - 'a' + 10;
     59 
     60     return r;
     61 
     62 }
     63 
     64 Str
     65 decodeB(char **ww)
     66 {
     67     unsigned char c[4];
     68     char *wp = *ww;
     69     char d[3];
     70     int i, n_pad;
     71     Str ap = Strnew_size(strlen(wp));
     72 
     73     n_pad = 0;
     74     while (1) {
     75 	for (i = 0; i < 4; i++) {
     76 	    c[i] = *(wp++);
     77 	    if (*wp == '\0' || *wp == '?') {
     78 		i++;
     79 		for (; i < 4; i++) {
     80 		    c[i] = '=';
     81 		}
     82 		break;
     83 	    }
     84 	}
     85 	if (c[3] == '=') {
     86 	    n_pad++;
     87 	    c[3] = 'A';
     88 	    if (c[2] == '=') {
     89 		n_pad++;
     90 		c[2] = 'A';
     91 	    }
     92 	}
     93 	for (i = 0; i < 4; i++) {
     94 	    c[i] = c2e(c[i]);
     95 	    if (c[i] == BAD_BASE64) {
     96 		*ww = wp;
     97 		return ap;
     98 	    }
     99 	}
    100 	d[0] = ((c[0] << 2) | (c[1] >> 4));
    101 	d[1] = ((c[1] << 4) | (c[2] >> 2));
    102 	d[2] = ((c[2] << 6) | c[3]);
    103 	for (i = 0; i < 3 - n_pad; i++) {
    104 	    Strcat_char(ap, d[i]);
    105 	}
    106 	if (n_pad || *wp == '\0' || *wp == '?')
    107 	    break;
    108     }
    109     *ww = wp;
    110     return ap;
    111 }
    112 
    113 Str
    114 decodeU(char **ww)
    115 {
    116     unsigned char c1, c2;
    117     char *w = *ww;
    118     int n, i;
    119     Str a;
    120 
    121     if (*w <= 0x20 || *w >= 0x60)
    122 	return Strnew_size(0);
    123     n = *w - 0x20;
    124     a = Strnew_size(n);
    125     for (w++, i = 2; *w != '\0' && n; n--) {
    126 	c1 = (w[0] - 0x20) % 0x40;
    127 	c2 = (w[1] - 0x20) % 0x40;
    128 	Strcat_char(a, (c1 << i) | (c2 >> (6 - i)));
    129 	if (i == 6) {
    130 	    w += 2;
    131 	    i = 2;
    132 	}
    133 	else {
    134 	    w++;
    135 	    i += 2;
    136 	}
    137     }
    138     return a;
    139 }
    140 
    141 /* RFC2047 (4.2. The "Q" encoding) */
    142 Str
    143 decodeQ(char **ww)
    144 {
    145     char *w = *ww;
    146     Str a = Strnew_size(strlen(w));
    147 
    148     for (; *w != '\0' && *w != '?'; w++) {
    149 	if (*w == '=') {
    150 	    w++;
    151 	    Strcat_char(a, ha2d(*w, *(w + 1)));
    152 	    w++;
    153 	}
    154 	else if (*w == '_') {
    155 	    Strcat_char(a, ' ');
    156 	}
    157 	else
    158 	    Strcat_char(a, *w);
    159     }
    160     *ww = w;
    161     return a;
    162 }
    163 
    164 /* RFC2045 (6.7. Quoted-Printable Content-Transfer-Encoding) */
    165 Str
    166 decodeQP(char **ww)
    167 {
    168     char *w = *ww;
    169     Str a = Strnew_size(strlen(w));
    170 
    171     for (; *w != '\0'; w++) {
    172 	if (*w == '=') {
    173 	    w++;
    174 	    if (*w == '\n' || *w == '\r' || *w == ' ' || *w == '\t') {
    175 		while (*w != '\n' && *w != '\0')
    176 		    w++;
    177 		if (*w == '\0')
    178 		    break;
    179 	    }
    180 	    else {
    181 		if (*w == '\0' || *(w + 1) == '\0')
    182 		    break;
    183 		Strcat_char(a, ha2d(*w, *(w + 1)));
    184 		w++;
    185 	    }
    186 	}
    187 	else
    188 	    Strcat_char(a, *w);
    189     }
    190     *ww = w;
    191     return a;
    192 }
    193 
    194 #ifdef USE_M17N
    195 Str
    196 decodeWord(char **ow, wc_ces * charset)
    197 #else
    198 Str
    199 decodeWord0(char **ow)
    200 #endif
    201 {
    202 #ifdef USE_M17N
    203     wc_ces c;
    204 #endif
    205     char *p, *w = *ow;
    206     char method;
    207     Str a = Strnew();
    208     Str tmp = Strnew();
    209 
    210     if (*w != '=' || *(w + 1) != '?')
    211 	goto convert_fail;
    212     w += 2;
    213     for (; *w != '?'; w++) {
    214 	if (*w == '\0')
    215 	    goto convert_fail;
    216 	Strcat_char(tmp, *w);
    217     }
    218 #ifdef USE_M17N
    219     c = wc_guess_charset(tmp->ptr, 0);
    220     if (!c)
    221 	goto convert_fail;
    222 #else
    223     if (strcasecmp(tmp->ptr, "ISO-8859-1") != 0 && strcasecmp(tmp->ptr, "US_ASCII") != 0)
    224 	/* NOT ISO-8859-1 encoding ... don't convert */
    225 	goto convert_fail;
    226 #endif
    227     w++;
    228     method = *(w++);
    229     if (*w != '?')
    230 	goto convert_fail;
    231     w++;
    232     p = w;
    233     switch (TOUPPER(method)) {
    234     case 'B':
    235 	a = decodeB(&w);
    236 	break;
    237     case 'Q':
    238 	a = decodeQ(&w);
    239 	break;
    240     default:
    241 	goto convert_fail;
    242     }
    243     if (p == w)
    244 	goto convert_fail;
    245     if (*w == '?') {
    246 	w++;
    247 	if (*w == '=')
    248 	    w++;
    249     }
    250     *ow = w;
    251 #ifdef USE_M17N
    252     *charset = c;
    253 #endif
    254     return a;
    255 
    256   convert_fail:
    257     return Strnew();
    258 }
    259 
    260 /* 
    261  * convert MIME encoded string to the original one
    262  */
    263 #ifdef USE_M17N
    264 Str
    265 decodeMIME(Str orgstr, wc_ces * charset)
    266 #else
    267 Str
    268 decodeMIME0(Str orgstr)
    269 #endif
    270 {
    271     char *org = orgstr->ptr, *endp = org + orgstr->length;
    272     char *org0, *p;
    273     Str cnv = NULL;
    274 
    275 #ifdef USE_M17N
    276     *charset = 0;
    277 #endif
    278     while (org < endp) {
    279 	if (*org == '=' && *(org + 1) == '?') {
    280 	    if (cnv == NULL) {
    281 		cnv = Strnew_size(orgstr->length);
    282 		Strcat_charp_n(cnv, orgstr->ptr, org - orgstr->ptr);
    283 	    }
    284 	  nextEncodeWord:
    285 	    p = org;
    286 	    Strcat(cnv, decodeWord(&org, charset));
    287 	    if (org == p) {	/* Convert failure */
    288 		Strcat_charp(cnv, org);
    289 		return cnv;
    290 	    }
    291 	    org0 = org;
    292 	  SPCRLoop:
    293 	    switch (*org0) {
    294 	    case ' ':
    295 	    case '\t':
    296 	    case '\n':
    297 	    case '\r':
    298 		org0++;
    299 		goto SPCRLoop;
    300 	    case '=':
    301 		if (org0[1] == '?') {
    302 		    org = org0;
    303 		    goto nextEncodeWord;
    304 		}
    305 	    default:
    306 		break;
    307 	    }
    308 	}
    309 	else {
    310 	    if (cnv != NULL)
    311 		Strcat_char(cnv, *org);
    312 	    org++;
    313 	}
    314     }
    315     if (cnv == NULL)
    316 	return orgstr;
    317     return cnv;
    318 }
    319 
    320 /* encoding */
    321 
    322 static char Base64Table[] =
    323     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
    324 
    325 Str
    326 encodeB(char *a)
    327 {
    328     unsigned char d[3];
    329     unsigned char c1, c2, c3, c4;
    330     int i, n_pad;
    331     Str w = Strnew();
    332 
    333     while (1) {
    334 	if (*a == '\0')
    335 	    break;
    336 	n_pad = 0;
    337 	d[1] = d[2] = 0;
    338 	for (i = 0; i < 3; i++) {
    339 	    d[i] = a[i];
    340 	    if (a[i] == '\0') {
    341 		n_pad = 3 - i;
    342 		break;
    343 	    }
    344 	}
    345 	c1 = d[0] >> 2;
    346 	c2 = (((d[0] << 4) | (d[1] >> 4)) & 0x3f);
    347 	if (n_pad == 2) {
    348 	    c3 = c4 = 64;
    349 	}
    350 	else if (n_pad == 1) {
    351 	    c3 = ((d[1] << 2) & 0x3f);
    352 	    c4 = 64;
    353 	}
    354 	else {
    355 	    c3 = (((d[1] << 2) | (d[2] >> 6)) & 0x3f);
    356 	    c4 = (d[2] & 0x3f);
    357 	}
    358 	Strcat_char(w, Base64Table[c1]);
    359 	Strcat_char(w, Base64Table[c2]);
    360 	Strcat_char(w, Base64Table[c3]);
    361 	Strcat_char(w, Base64Table[c4]);
    362 	if (n_pad)
    363 	    break;
    364 	a += 3;
    365     }
    366     return w;
    367 }