w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

url.c (53708B)


      1 /* $Id$ */
      2 #include "fm.h"
      3 #ifndef __MINGW32_VERSION
      4 #include <sys/types.h>
      5 #include <sys/socket.h>
      6 #include <netinet/in.h>
      7 #include <arpa/inet.h>
      8 #include <netdb.h>
      9 #else
     10 #include <winsock.h>
     11 #endif /* __MINGW32_VERSION */
     12 
     13 #include <signal.h>
     14 #include <setjmp.h>
     15 #include <errno.h>
     16 
     17 #include <sys/stat.h>
     18 #ifdef __EMX__
     19 #include <io.h>			/* ?? */
     20 #endif				/* __EMX__ */
     21 
     22 #include "html.h"
     23 #include "Str.h"
     24 #include "myctype.h"
     25 #include "regex.h"
     26 
     27 #ifdef USE_SSL
     28 #ifndef SSLEAY_VERSION_NUMBER
     29 #include <openssl/crypto.h>		/* SSLEAY_VERSION_NUMBER may be here */
     30 #endif
     31 #include <openssl/err.h>
     32 #endif
     33 
     34 #ifdef	__WATT32__
     35 #define	write(a,b,c)	write_s(a,b,c)
     36 #endif				/* __WATT32__ */
     37 
     38 #ifdef __MINGW32_VERSION
     39 #define	write(a,b,c)	send(a,b,c, 0)
     40 #define close(fd)	closesocket(fd)
     41 #endif
     42 
     43 #ifdef INET6
     44 /* see rc.c, "dns_order" and dnsorders[] */
     45 int ai_family_order_table[7][3] = {
     46     {PF_UNSPEC, PF_UNSPEC, PF_UNSPEC},	/* 0:unspec */
     47     {PF_INET, PF_INET6, PF_UNSPEC},	/* 1:inet inet6 */
     48     {PF_INET6, PF_INET, PF_UNSPEC},	/* 2:inet6 inet */
     49     {PF_UNSPEC, PF_UNSPEC, PF_UNSPEC},  /* 3: --- */
     50     {PF_INET, PF_UNSPEC, PF_UNSPEC},    /* 4:inet */
     51     {PF_UNSPEC, PF_UNSPEC, PF_UNSPEC},  /* 5: --- */
     52     {PF_INET6, PF_UNSPEC, PF_UNSPEC},   /* 6:inet6 */
     53 };
     54 #endif				/* INET6 */
     55 
     56 static JMP_BUF AbortLoading;
     57 
     58 /* XXX: note html.h SCM_ */
     59 static int
     60  DefaultPort[] = {
     61     80,				/* http */
     62     70,				/* gopher */
     63     21,				/* ftp */
     64     21,				/* ftpdir */
     65     0,				/* local - not defined */
     66     0,				/* local-CGI - not defined? */
     67     0,				/* exec - not defined? */
     68     119,			/* nntp */
     69     119,			/* nntp group */
     70     119,			/* news */
     71     119,			/* news group */
     72     0,				/* data - not defined */
     73     0,				/* mailto - not defined */
     74 #ifdef USE_SSL
     75     443,			/* https */
     76 #endif				/* USE_SSL */
     77 };
     78 
     79 struct cmdtable schemetable[] = {
     80     {"http", SCM_HTTP},
     81     {"gopher", SCM_GOPHER},
     82     {"ftp", SCM_FTP},
     83     {"local", SCM_LOCAL},
     84     {"file", SCM_LOCAL},
     85     /*  {"exec", SCM_EXEC}, */
     86     {"nntp", SCM_NNTP},
     87     /*  {"nntp", SCM_NNTP_GROUP}, */
     88     {"news", SCM_NEWS},
     89     /*  {"news", SCM_NEWS_GROUP}, */
     90     {"data", SCM_DATA},
     91 #ifndef USE_W3MMAILER
     92     {"mailto", SCM_MAILTO},
     93 #endif
     94 #ifdef USE_SSL
     95     {"https", SCM_HTTPS},
     96 #endif				/* USE_SSL */
     97     {NULL, SCM_UNKNOWN},
     98 };
     99 
    100 static struct table2 DefaultGuess[] = {
    101     {"html", "text/html"},
    102     {"htm", "text/html"},
    103     {"shtml", "text/html"},
    104     {"xhtml", "application/xhtml+xml"},
    105     {"gif", "image/gif"},
    106     {"jpeg", "image/jpeg"},
    107     {"jpg", "image/jpeg"},
    108     {"png", "image/png"},
    109     {"xbm", "image/xbm"},
    110     {"au", "audio/basic"},
    111     {"gz", "application/x-gzip"},
    112     {"Z", "application/x-compress"},
    113     {"bz2", "application/x-bzip"},
    114     {"tar", "application/x-tar"},
    115     {"zip", "application/x-zip"},
    116     {"lha", "application/x-lha"},
    117     {"lzh", "application/x-lha"},
    118     {"ps", "application/postscript"},
    119     {"pdf", "application/pdf"},
    120     {NULL, NULL}
    121 };
    122 
    123 static void add_index_file(ParsedURL *pu, URLFile *uf);
    124 
    125 /* #define HTTP_DEFAULT_FILE    "/index.html" */
    126 
    127 #ifndef HTTP_DEFAULT_FILE
    128 #define HTTP_DEFAULT_FILE "/"
    129 #endif				/* not HTTP_DEFAULT_FILE */
    130 
    131 #ifdef SOCK_DEBUG
    132 #include <stdarg.h>
    133 
    134 static void
    135 sock_log(char *message, ...)
    136 {
    137     FILE *f = fopen("zzzsocklog", "a");
    138     va_list va;
    139 
    140     if (f == NULL)
    141 	return;
    142     va_start(va, message);
    143     vfprintf(f, message, va);
    144     fclose(f);
    145 }
    146 
    147 #endif
    148 
    149 static TextList *mimetypes_list;
    150 static struct table2 **UserMimeTypes;
    151 
    152 static struct table2 *
    153 loadMimeTypes(char *filename)
    154 {
    155     FILE *f;
    156     char *d, *type;
    157     int i, n;
    158     Str tmp;
    159     struct table2 *mtypes;
    160 
    161     f = fopen(expandPath(filename), "r");
    162     if (f == NULL)
    163 	return NULL;
    164     n = 0;
    165     while (tmp = Strfgets(f), tmp->length > 0) {
    166 	d = tmp->ptr;
    167 	if (d[0] != '#') {
    168 	    d = strtok(d, " \t\n\r");
    169 	    if (d != NULL) {
    170 		d = strtok(NULL, " \t\n\r");
    171 		for (i = 0; d != NULL; i++)
    172 		    d = strtok(NULL, " \t\n\r");
    173 		n += i;
    174 	    }
    175 	}
    176     }
    177     fseek(f, 0, 0);
    178     mtypes = New_N(struct table2, n + 1);
    179     i = 0;
    180     while (tmp = Strfgets(f), tmp->length > 0) {
    181 	d = tmp->ptr;
    182 	if (d[0] == '#')
    183 	    continue;
    184 	type = strtok(d, " \t\n\r");
    185 	if (type == NULL)
    186 	    continue;
    187 	while (1) {
    188 	    d = strtok(NULL, " \t\n\r");
    189 	    if (d == NULL)
    190 		break;
    191 	    mtypes[i].item1 = Strnew_charp(d)->ptr;
    192 	    mtypes[i].item2 = Strnew_charp(type)->ptr;
    193 	    i++;
    194 	}
    195     }
    196     mtypes[i].item1 = NULL;
    197     mtypes[i].item2 = NULL;
    198     fclose(f);
    199     return mtypes;
    200 }
    201 
    202 void
    203 initMimeTypes()
    204 {
    205     int i;
    206     TextListItem *tl;
    207 
    208     if (non_null(mimetypes_files))
    209 	mimetypes_list = make_domain_list(mimetypes_files);
    210     else
    211 	mimetypes_list = NULL;
    212     if (mimetypes_list == NULL)
    213 	return;
    214     UserMimeTypes = New_N(struct table2 *, mimetypes_list->nitem);
    215     for (i = 0, tl = mimetypes_list->first; tl; i++, tl = tl->next)
    216 	UserMimeTypes[i] = loadMimeTypes(tl->ptr);
    217 }
    218 
    219 static char *
    220 DefaultFile(int scheme)
    221 {
    222     switch (scheme) {
    223     case SCM_HTTP:
    224 #ifdef USE_SSL
    225     case SCM_HTTPS:
    226 #endif				/* USE_SSL */
    227 	return allocStr(HTTP_DEFAULT_FILE, -1);
    228 #ifdef USE_GOPHER
    229     case SCM_GOPHER:
    230 	return allocStr("1", -1);
    231 #endif				/* USE_GOPHER */
    232     case SCM_LOCAL:
    233     case SCM_LOCAL_CGI:
    234     case SCM_FTP:
    235     case SCM_FTPDIR:
    236 	return allocStr("/", -1);
    237     }
    238     return NULL;
    239 }
    240 
    241 static MySignalHandler
    242 KeyAbort(SIGNAL_ARG)
    243 {
    244     LONGJMP(AbortLoading, 1);
    245     SIGNAL_RETURN;
    246 }
    247 
    248 #ifdef USE_SSL
    249 SSL_CTX *ssl_ctx = NULL;
    250 
    251 void
    252 free_ssl_ctx()
    253 {
    254     if (ssl_ctx != NULL)
    255 	SSL_CTX_free(ssl_ctx);
    256     ssl_ctx = NULL;
    257     ssl_accept_this_site(NULL);
    258 }
    259 
    260 #if SSLEAY_VERSION_NUMBER >= 0x00905100
    261 #include <openssl/rand.h>
    262 static void
    263 init_PRNG()
    264 {
    265     char buffer[256];
    266     const char *file;
    267     long l;
    268     if (RAND_status())
    269 	return;
    270     if ((file = RAND_file_name(buffer, sizeof(buffer)))) {
    271 #ifdef USE_EGD
    272 	if (RAND_egd(file) > 0)
    273 	    return;
    274 #endif
    275 	RAND_load_file(file, -1);
    276     }
    277     if (RAND_status())
    278 	goto seeded;
    279     srand48((long)time(NULL));
    280     while (!RAND_status()) {
    281 	l = lrand48();
    282 	RAND_seed((unsigned char *)&l, sizeof(long));
    283     }
    284   seeded:
    285     if (file)
    286 	RAND_write_file(file);
    287 }
    288 #endif				/* SSLEAY_VERSION_NUMBER >= 0x00905100 */
    289 
    290 static SSL *
    291 openSSLHandle(int sock, char *hostname, char **p_cert)
    292 {
    293     SSL *handle = NULL;
    294     static char *old_ssl_forbid_method = NULL;
    295 #ifdef USE_SSL_VERIFY
    296     static int old_ssl_verify_server = -1;
    297 #endif
    298 
    299     if (old_ssl_forbid_method != ssl_forbid_method
    300 	&& (!old_ssl_forbid_method || !ssl_forbid_method ||
    301 	    strcmp(old_ssl_forbid_method, ssl_forbid_method))) {
    302 	old_ssl_forbid_method = ssl_forbid_method;
    303 #ifdef USE_SSL_VERIFY
    304 	ssl_path_modified = 1;
    305 #else
    306 	free_ssl_ctx();
    307 #endif
    308     }
    309 #ifdef USE_SSL_VERIFY
    310     if (old_ssl_verify_server != ssl_verify_server) {
    311 	old_ssl_verify_server = ssl_verify_server;
    312 	ssl_path_modified = 1;
    313     }
    314     if (ssl_path_modified) {
    315 	free_ssl_ctx();
    316 	ssl_path_modified = 0;
    317     }
    318 #endif				/* defined(USE_SSL_VERIFY) */
    319     if (ssl_ctx == NULL) {
    320 	int option;
    321 #if SSLEAY_VERSION_NUMBER < 0x0800
    322 	ssl_ctx = SSL_CTX_new();
    323 	X509_set_default_verify_paths(ssl_ctx->cert);
    324 #else				/* SSLEAY_VERSION_NUMBER >= 0x0800 */
    325 	SSLeay_add_ssl_algorithms();
    326 	SSL_load_error_strings();
    327 	if (!(ssl_ctx = SSL_CTX_new(SSLv23_client_method())))
    328 	    goto eend;
    329 	option = SSL_OP_ALL;
    330 	if (ssl_forbid_method) {
    331 	    if (strchr(ssl_forbid_method, '2'))
    332 		option |= SSL_OP_NO_SSLv2;
    333 	    if (strchr(ssl_forbid_method, '3'))
    334 		option |= SSL_OP_NO_SSLv3;
    335 	    if (strchr(ssl_forbid_method, 't'))
    336 		option |= SSL_OP_NO_TLSv1;
    337 	    if (strchr(ssl_forbid_method, 'T'))
    338 		option |= SSL_OP_NO_TLSv1;
    339 	}
    340 	SSL_CTX_set_options(ssl_ctx, option);
    341 #ifdef USE_SSL_VERIFY
    342 	/* derived from openssl-0.9.5/apps/s_{client,cb}.c */
    343 #if 1				/* use SSL_get_verify_result() to verify cert */
    344 	SSL_CTX_set_verify(ssl_ctx, SSL_VERIFY_NONE, NULL);
    345 #else
    346 	SSL_CTX_set_verify(ssl_ctx,
    347 			   ssl_verify_server ? SSL_VERIFY_PEER :
    348 			   SSL_VERIFY_NONE, NULL);
    349 #endif
    350 	if (ssl_cert_file != NULL && *ssl_cert_file != '\0') {
    351 	    int ng = 1;
    352 	    if (SSL_CTX_use_certificate_file
    353 		(ssl_ctx, ssl_cert_file, SSL_FILETYPE_PEM) > 0) {
    354 		char *key_file = (ssl_key_file == NULL
    355 				  || *ssl_key_file ==
    356 				  '\0') ? ssl_cert_file : ssl_key_file;
    357 		if (SSL_CTX_use_PrivateKey_file
    358 		    (ssl_ctx, key_file, SSL_FILETYPE_PEM) > 0)
    359 		    if (SSL_CTX_check_private_key(ssl_ctx))
    360 			ng = 0;
    361 	    }
    362 	    if (ng) {
    363 		free_ssl_ctx();
    364 		goto eend;
    365 	    }
    366 	}
    367 	if ((!ssl_ca_file && !ssl_ca_path)
    368 	    || SSL_CTX_load_verify_locations(ssl_ctx, ssl_ca_file, ssl_ca_path))
    369 #endif				/* defined(USE_SSL_VERIFY) */
    370 	    SSL_CTX_set_default_verify_paths(ssl_ctx);
    371 #endif				/* SSLEAY_VERSION_NUMBER >= 0x0800 */
    372     }
    373     handle = SSL_new(ssl_ctx);
    374     SSL_set_fd(handle, sock);
    375 #if SSLEAY_VERSION_NUMBER >= 0x00905100
    376     init_PRNG();
    377 #endif				/* SSLEAY_VERSION_NUMBER >= 0x00905100 */
    378 #if (SSLEAY_VERSION_NUMBER >= 0x00908070) && !defined(OPENSSL_NO_TLSEXT)
    379     SSL_set_tlsext_host_name(handle,hostname);
    380 #endif				/* (SSLEAY_VERSION_NUMBER >= 0x00908070) && !defined(OPENSSL_NO_TLSEXT) */
    381     if (SSL_connect(handle) > 0) {
    382 	Str serv_cert = ssl_get_certificate(handle, hostname);
    383 	if (serv_cert) {
    384 	    *p_cert = serv_cert->ptr;
    385 	    return handle;
    386 	}
    387 	close(sock);
    388 	SSL_free(handle);
    389 	return NULL;
    390     }
    391   eend:
    392     close(sock);
    393     if (handle)
    394 	SSL_free(handle);
    395     /* FIXME: gettextize? */
    396     disp_err_message(Sprintf
    397 		     ("SSL error: %s",
    398 		      ERR_error_string(ERR_get_error(), NULL))->ptr, FALSE);
    399     return NULL;
    400 }
    401 
    402 static void
    403 SSL_write_from_file(SSL * ssl, char *file)
    404 {
    405     FILE *fd;
    406     int c;
    407     char buf[1];
    408     fd = fopen(file, "r");
    409     if (fd != NULL) {
    410 	while ((c = fgetc(fd)) != EOF) {
    411 	    buf[0] = c;
    412 	    SSL_write(ssl, buf, 1);
    413 	}
    414 	fclose(fd);
    415     }
    416 }
    417 
    418 #endif				/* USE_SSL */
    419 
    420 static void
    421 write_from_file(int sock, char *file)
    422 {
    423     FILE *fd;
    424     int c;
    425     char buf[1];
    426     fd = fopen(file, "r");
    427     if (fd != NULL) {
    428 	while ((c = fgetc(fd)) != EOF) {
    429 	    buf[0] = c;
    430 	    write(sock, buf, 1);
    431 	}
    432 	fclose(fd);
    433     }
    434 }
    435 
    436 ParsedURL *
    437 baseURL(Buffer *buf)
    438 {
    439     if (buf->bufferprop & BP_NO_URL) {
    440 	/* no URL is defined for the buffer */
    441 	return NULL;
    442     }
    443     if (buf->baseURL != NULL) {
    444 	/* <BASE> tag is defined in the document */
    445 	return buf->baseURL;
    446     }
    447     else
    448 	return &buf->currentURL;
    449 }
    450 
    451 int
    452 openSocket(char *const hostname,
    453 	   char *remoteport_name, unsigned short remoteport_num)
    454 {
    455     volatile int sock = -1;
    456 #ifdef INET6
    457     int *af;
    458     struct addrinfo hints, *res0, *res;
    459     int error;
    460     char *hname;
    461 #else				/* not INET6 */
    462     struct sockaddr_in hostaddr;
    463     struct hostent *entry;
    464     struct protoent *proto;
    465     unsigned short s_port;
    466     int a1, a2, a3, a4;
    467     unsigned long adr;
    468 #endif				/* not INET6 */
    469     MySignalHandler(*volatile prevtrap) (SIGNAL_ARG) = NULL;
    470 
    471     if (fmInitialized) {
    472 	/* FIXME: gettextize? */
    473 	message(Sprintf("Opening socket...")->ptr, 0, 0);
    474 	refresh();
    475     }
    476     if (SETJMP(AbortLoading) != 0) {
    477 #ifdef SOCK_DEBUG
    478 	sock_log("openSocket() failed. reason: user abort\n");
    479 #endif
    480 	if (sock >= 0)
    481 	    close(sock);
    482 	goto error;
    483     }
    484     TRAP_ON;
    485     if (hostname == NULL) {
    486 #ifdef SOCK_DEBUG
    487 	sock_log("openSocket() failed. reason: Bad hostname \"%s\"\n",
    488 		 hostname);
    489 #endif
    490 	goto error;
    491     }
    492 
    493 #ifdef INET6
    494     /* rfc2732 compliance */
    495     hname = hostname;
    496     if (hname != NULL && hname[0] == '[' && hname[strlen(hname) - 1] == ']') {
    497 	hname = allocStr(hostname + 1, -1);
    498 	hname[strlen(hname) - 1] = '\0';
    499 	if (strspn(hname, "0123456789abcdefABCDEF:.") != strlen(hname))
    500 	    goto error;
    501     }
    502     for (af = ai_family_order_table[DNS_order];; af++) {
    503 	memset(&hints, 0, sizeof(hints));
    504 	hints.ai_family = *af;
    505 	hints.ai_socktype = SOCK_STREAM;
    506 	if (remoteport_num != 0) {
    507 	    Str portbuf = Sprintf("%d", remoteport_num);
    508 	    error = getaddrinfo(hname, portbuf->ptr, &hints, &res0);
    509 	}
    510 	else {
    511 	    error = -1;
    512 	}
    513 	if (error && remoteport_name && remoteport_name[0] != '\0') {
    514 	    /* try default port */
    515 	    error = getaddrinfo(hname, remoteport_name, &hints, &res0);
    516 	}
    517 	if (error) {
    518 	    if (*af == PF_UNSPEC) {
    519 		goto error;
    520 	    }
    521 	    /* try next ai family */
    522 	    continue;
    523 	}
    524 	sock = -1;
    525 	for (res = res0; res; res = res->ai_next) {
    526 	    sock = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
    527 	    if (sock < 0) {
    528 		continue;
    529 	    }
    530 	    if (connect(sock, res->ai_addr, res->ai_addrlen) < 0) {
    531 		close(sock);
    532 		sock = -1;
    533 		continue;
    534 	    }
    535 	    break;
    536 	}
    537 	if (sock < 0) {
    538 	    freeaddrinfo(res0);
    539 	    if (*af == PF_UNSPEC) {
    540 		goto error;
    541 	    }
    542 	    /* try next ai family */
    543 	    continue;
    544 	}
    545 	freeaddrinfo(res0);
    546 	break;
    547     }
    548 #else				/* not INET6 */
    549     s_port = htons(remoteport_num);
    550     bzero((char *)&hostaddr, sizeof(struct sockaddr_in));
    551     if ((proto = getprotobyname("tcp")) == NULL) {
    552 	/* protocol number of TCP is 6 */
    553 	proto = New(struct protoent);
    554 	proto->p_proto = 6;
    555     }
    556     if ((sock = socket(AF_INET, SOCK_STREAM, proto->p_proto)) < 0) {
    557 #ifdef SOCK_DEBUG
    558 	sock_log("openSocket: socket() failed. reason: %s\n", strerror(errno));
    559 #endif
    560 	goto error;
    561     }
    562     regexCompile("^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$", 0);
    563     if (regexMatch(hostname, -1, 1)) {
    564 	sscanf(hostname, "%d.%d.%d.%d", &a1, &a2, &a3, &a4);
    565 	adr = htonl((a1 << 24) | (a2 << 16) | (a3 << 8) | a4);
    566 	bcopy((void *)&adr, (void *)&hostaddr.sin_addr, sizeof(long));
    567 	hostaddr.sin_family = AF_INET;
    568 	hostaddr.sin_port = s_port;
    569 	if (fmInitialized) {
    570 	    message(Sprintf("Connecting to %s", hostname)->ptr, 0, 0);
    571 	    refresh();
    572 	}
    573 	if (connect(sock, (struct sockaddr *)&hostaddr,
    574 		    sizeof(struct sockaddr_in)) < 0) {
    575 #ifdef SOCK_DEBUG
    576 	    sock_log("openSocket: connect() failed. reason: %s\n",
    577 		     strerror(errno));
    578 #endif
    579 	    goto error;
    580 	}
    581     }
    582     else {
    583 	char **h_addr_list;
    584 	int result = -1;
    585 	if (fmInitialized) {
    586 	    message(Sprintf("Performing hostname lookup on %s", hostname)->ptr,
    587 		    0, 0);
    588 	    refresh();
    589 	}
    590 	if ((entry = gethostbyname(hostname)) == NULL) {
    591 #ifdef SOCK_DEBUG
    592 	    sock_log("openSocket: gethostbyname() failed. reason: %s\n",
    593 		     strerror(errno));
    594 #endif
    595 	    goto error;
    596 	}
    597 	hostaddr.sin_family = AF_INET;
    598 	hostaddr.sin_port = s_port;
    599 	for (h_addr_list = entry->h_addr_list; *h_addr_list; h_addr_list++) {
    600 	    bcopy((void *)h_addr_list[0], (void *)&hostaddr.sin_addr,
    601 		  entry->h_length);
    602 #ifdef SOCK_DEBUG
    603 	    adr = ntohl(*(long *)&hostaddr.sin_addr);
    604 	    sock_log("openSocket: connecting %d.%d.%d.%d\n",
    605 		     (adr >> 24) & 0xff,
    606 		     (adr >> 16) & 0xff, (adr >> 8) & 0xff, adr & 0xff);
    607 #endif
    608 	    if (fmInitialized) {
    609 		message(Sprintf("Connecting to %s", hostname)->ptr, 0, 0);
    610 		refresh();
    611 	    }
    612 	    if ((result = connect(sock, (struct sockaddr *)&hostaddr,
    613 				  sizeof(struct sockaddr_in))) == 0) {
    614 		break;
    615 	    }
    616 #ifdef SOCK_DEBUG
    617 	    else {
    618 		sock_log("openSocket: connect() failed. reason: %s\n",
    619 			 strerror(errno));
    620 	    }
    621 #endif
    622 	}
    623 	if (result < 0) {
    624 	    goto error;
    625 	}
    626     }
    627 #endif				/* not INET6 */
    628 
    629     TRAP_OFF;
    630     return sock;
    631   error:
    632     TRAP_OFF;
    633     return -1;
    634 
    635 }
    636 
    637 
    638 #define COPYPATH_SPC_ALLOW 0
    639 #define COPYPATH_SPC_IGNORE 1
    640 #define COPYPATH_SPC_REPLACE 2
    641 
    642 static char *
    643 copyPath(char *orgpath, int length, int option)
    644 {
    645     Str tmp = Strnew();
    646     while (*orgpath && length != 0) {
    647 	if (IS_SPACE(*orgpath)) {
    648 	    switch (option) {
    649 	    case COPYPATH_SPC_ALLOW:
    650 		Strcat_char(tmp, *orgpath);
    651 		break;
    652 	    case COPYPATH_SPC_IGNORE:
    653 		/* do nothing */
    654 		break;
    655 	    case COPYPATH_SPC_REPLACE:
    656 		Strcat_charp(tmp, "%20");
    657 		break;
    658 	    }
    659 	}
    660 	else
    661 	    Strcat_char(tmp, *orgpath);
    662 	orgpath++;
    663 	length--;
    664     }
    665     return tmp->ptr;
    666 }
    667 
    668 void
    669 parseURL(char *url, ParsedURL *p_url, ParsedURL *current)
    670 {
    671     char *p, *q;
    672     Str tmp;
    673 
    674     url = url_quote(url);	/* quote 0x01-0x20, 0x7F-0xFF */
    675 
    676     p = url;
    677     p_url->scheme = SCM_MISSING;
    678     p_url->port = 0;
    679     p_url->user = NULL;
    680     p_url->pass = NULL;
    681     p_url->host = NULL;
    682     p_url->is_nocache = 0;
    683     p_url->file = NULL;
    684     p_url->real_file = NULL;
    685     p_url->query = NULL;
    686     p_url->label = NULL;
    687 
    688     /* RFC1808: Relative Uniform Resource Locators
    689      * 4.  Resolving Relative URLs
    690      */
    691     if (*url == '\0' || *url == '#') {
    692 	if (current)
    693 	    copyParsedURL(p_url, current);
    694 	goto do_label;
    695     }
    696 #if defined( __EMX__ ) || defined( __CYGWIN__ )
    697     if (!strncmp(url, "file://localhost/", 17)) {
    698 	p_url->scheme = SCM_LOCAL;
    699 	p += 17 - 1;
    700 	url += 17 - 1;
    701     }
    702 #endif
    703 #ifdef SUPPORT_DOS_DRIVE_PREFIX
    704     if (IS_ALPHA(*p) && (p[1] == ':' || p[1] == '|')) {
    705 	p_url->scheme = SCM_LOCAL;
    706 	goto analyze_file;
    707     }
    708 #endif				/* SUPPORT_DOS_DRIVE_PREFIX */
    709     /* search for scheme */
    710     p_url->scheme = getURLScheme(&p);
    711     if (p_url->scheme == SCM_MISSING) {
    712 	/* scheme part is not found in the url. This means either
    713 	 * (a) the url is relative to the current or (b) the url
    714 	 * denotes a filename (therefore the scheme is SCM_LOCAL).
    715 	 */
    716 	if (current) {
    717 	    switch (current->scheme) {
    718 	    case SCM_LOCAL:
    719 	    case SCM_LOCAL_CGI:
    720 		p_url->scheme = SCM_LOCAL;
    721 		break;
    722 	    case SCM_FTP:
    723 	    case SCM_FTPDIR:
    724 		p_url->scheme = SCM_FTP;
    725 		break;
    726 #ifdef USE_NNTP
    727 	    case SCM_NNTP:
    728 	    case SCM_NNTP_GROUP:
    729 		p_url->scheme = SCM_NNTP;
    730 		break;
    731 	    case SCM_NEWS:
    732 	    case SCM_NEWS_GROUP:
    733 		p_url->scheme = SCM_NEWS;
    734 		break;
    735 #endif
    736 	    default:
    737 		p_url->scheme = current->scheme;
    738 		break;
    739 	    }
    740 	}
    741 	else
    742 	    p_url->scheme = SCM_LOCAL;
    743 	p = url;
    744 	if (!strncmp(p, "//", 2)) {
    745 	    /* URL begins with // */
    746 	    /* it means that 'scheme:' is abbreviated */
    747 	    p += 2;
    748 	    goto analyze_url;
    749 	}
    750 	/* the url doesn't begin with '//' */
    751 	goto analyze_file;
    752     }
    753     /* scheme part has been found */
    754     if (p_url->scheme == SCM_UNKNOWN) {
    755 	p_url->file = allocStr(url, -1);
    756 	return;
    757     }
    758     /* get host and port */
    759     if (p[0] != '/' || p[1] != '/') {	/* scheme:foo or scheme:/foo */
    760 	p_url->host = NULL;
    761 	if (p_url->scheme != SCM_UNKNOWN)
    762 	    p_url->port = DefaultPort[p_url->scheme];
    763 	else
    764 	    p_url->port = 0;
    765 	goto analyze_file;
    766     }
    767     /* after here, p begins with // */
    768     if (p_url->scheme == SCM_LOCAL) {	/* file://foo           */
    769 #ifdef __EMX__
    770 	p += 2;
    771 	goto analyze_file;
    772 #else
    773 	if (p[2] == '/' || p[2] == '~'
    774 	    /* <A HREF="file:///foo">file:///foo</A>  or <A HREF="file://~user">file://~user</A> */
    775 #ifdef SUPPORT_DOS_DRIVE_PREFIX
    776 	    || (IS_ALPHA(p[2]) && (p[3] == ':' || p[3] == '|'))
    777 	    /* <A HREF="file://DRIVE/foo">file://DRIVE/foo</A> */
    778 #endif				/* SUPPORT_DOS_DRIVE_PREFIX */
    779 	    ) {
    780 	    p += 2;
    781 	    goto analyze_file;
    782 	}
    783 #endif				/* __EMX__ */
    784     }
    785     p += 2;			/* scheme://foo         */
    786     /*          ^p is here  */
    787   analyze_url:
    788     q = p;
    789 #ifdef INET6
    790     if (*q == '[') {		/* rfc2732,rfc2373 compliance */
    791 	p++;
    792 	while (IS_XDIGIT(*p) || *p == ':' || *p == '.')
    793 	    p++;
    794 	if (*p != ']' || (*(p + 1) && strchr(":/?#", *(p + 1)) == NULL))
    795 	    p = q;
    796     }
    797 #endif
    798     while (*p && strchr(":/@?#", *p) == NULL)
    799 	p++;
    800     switch (*p) {
    801     case ':':
    802 	/* scheme://user:pass@host or
    803 	 * scheme://host:port
    804 	 */
    805 	p_url->host = copyPath(q, p - q, COPYPATH_SPC_IGNORE);
    806 	q = ++p;
    807 	while (*p && strchr("@/?#", *p) == NULL)
    808 	    p++;
    809 	if (*p == '@') {
    810 	    /* scheme://user:pass@...       */
    811 	    p_url->pass = copyPath(q, p - q, COPYPATH_SPC_ALLOW);
    812 	    q = ++p;
    813 	    p_url->user = p_url->host;
    814 	    p_url->host = NULL;
    815 	    goto analyze_url;
    816 	}
    817 	/* scheme://host:port/ */
    818 	tmp = Strnew_charp_n(q, p - q);
    819 	p_url->port = atoi(tmp->ptr);
    820 	/* *p is one of ['\0', '/', '?', '#'] */
    821 	break;
    822     case '@':
    823 	/* scheme://user@...            */
    824 	p_url->user = copyPath(q, p - q, COPYPATH_SPC_IGNORE);
    825 	q = ++p;
    826 	goto analyze_url;
    827     case '\0':
    828 	/* scheme://host                */
    829     case '/':
    830     case '?':
    831     case '#':
    832 	p_url->host = copyPath(q, p - q, COPYPATH_SPC_IGNORE);
    833 	p_url->port = DefaultPort[p_url->scheme];
    834 	break;
    835     }
    836   analyze_file:
    837 #ifndef SUPPORT_NETBIOS_SHARE
    838     if (p_url->scheme == SCM_LOCAL && p_url->user == NULL &&
    839 	p_url->host != NULL && *p_url->host != '\0' &&
    840 	strcmp(p_url->host, "localhost")) {
    841 	/*
    842 	 * In the environments other than CYGWIN, a URL like 
    843 	 * file://host/file is regarded as ftp://host/file.
    844 	 * On the other hand, file://host/file on CYGWIN is
    845 	 * regarded as local access to the file //host/file.
    846 	 * `host' is a netbios-hostname, drive, or any other
    847 	 * name; It is CYGWIN system call who interprets that.
    848 	 */
    849 
    850 	p_url->scheme = SCM_FTP;	/* ftp://host/... */
    851 	if (p_url->port == 0)
    852 	    p_url->port = DefaultPort[SCM_FTP];
    853     }
    854 #endif
    855     if ((*p == '\0' || *p == '#' || *p == '?') && p_url->host == NULL) {
    856 	p_url->file = "";
    857 	goto do_query;
    858     }
    859 #ifdef SUPPORT_DOS_DRIVE_PREFIX
    860     if (p_url->scheme == SCM_LOCAL) {
    861 	q = p;
    862 	if (*q == '/')
    863 	    q++;
    864 	if (IS_ALPHA(q[0]) && (q[1] == ':' || q[1] == '|')) {
    865 	    if (q[1] == '|') {
    866 		p = allocStr(q, -1);
    867 		p[1] = ':';
    868 	    }
    869 	    else
    870 		p = q;
    871 	}
    872     }
    873 #endif
    874 
    875     q = p;
    876 #ifdef USE_GOPHER
    877     if (p_url->scheme == SCM_GOPHER) {
    878 	if (*q == '/')
    879 	    q++;
    880 	if (*q && q[0] != '/' && q[1] != '/' && q[2] == '/')
    881 	    q++;
    882     }
    883 #endif				/* USE_GOPHER */
    884     if (*p == '/')
    885 	p++;
    886     if (*p == '\0' || *p == '#' || *p == '?') {	/* scheme://host[:port]/ */
    887 	p_url->file = DefaultFile(p_url->scheme);
    888 	goto do_query;
    889     }
    890 #ifdef USE_GOPHER
    891     if (p_url->scheme == SCM_GOPHER && *p == 'R') {
    892 	p++;
    893 	tmp = Strnew();
    894 	Strcat_char(tmp, *(p++));
    895 	while (*p && *p != '/')
    896 	    p++;
    897 	Strcat_charp(tmp, p);
    898 	while (*p)
    899 	    p++;
    900 	p_url->file = copyPath(tmp->ptr, -1, COPYPATH_SPC_IGNORE);
    901     }
    902     else
    903 #endif				/* USE_GOPHER */
    904     {
    905 	char *cgi = strchr(p, '?');
    906       again:
    907 	while (*p && *p != '#' && p != cgi)
    908 	    p++;
    909 	if (*p == '#' && p_url->scheme == SCM_LOCAL) {
    910 	    /* 
    911 	     * According to RFC2396, # means the beginning of
    912 	     * URI-reference, and # should be escaped.  But,
    913 	     * if the scheme is SCM_LOCAL, the special
    914 	     * treatment will apply to # for convinience.
    915 	     */
    916 	    if (p > q && *(p - 1) == '/' && (cgi == NULL || p < cgi)) {
    917 		/* 
    918 		 * # comes as the first character of the file name
    919 		 * that means, # is not a label but a part of the file
    920 		 * name.
    921 		 */
    922 		p++;
    923 		goto again;
    924 	    }
    925 	    else if (*(p + 1) == '\0') {
    926 		/* 
    927 		 * # comes as the last character of the file name that
    928 		 * means, # is not a label but a part of the file
    929 		 * name.
    930 		 */
    931 		p++;
    932 	    }
    933 	}
    934 	if (p_url->scheme == SCM_LOCAL || p_url->scheme == SCM_MISSING)
    935 	    p_url->file = copyPath(q, p - q, COPYPATH_SPC_ALLOW);
    936 	else
    937 	    p_url->file = copyPath(q, p - q, COPYPATH_SPC_IGNORE);
    938     }
    939 
    940   do_query:
    941     if (*p == '?') {
    942 	q = ++p;
    943 	while (*p && *p != '#')
    944 	    p++;
    945 	p_url->query = copyPath(q, p - q, COPYPATH_SPC_ALLOW);
    946     }
    947   do_label:
    948     if (p_url->scheme == SCM_MISSING) {
    949 	p_url->scheme = SCM_LOCAL;
    950 	p_url->file = allocStr(p, -1);
    951 	p_url->label = NULL;
    952     }
    953     else if (*p == '#')
    954 	p_url->label = allocStr(p + 1, -1);
    955     else
    956 	p_url->label = NULL;
    957 }
    958 
    959 #define initParsedURL(p) bzero(p,sizeof(ParsedURL))
    960 #define ALLOC_STR(s) ((s)==NULL?NULL:allocStr(s,-1))
    961 
    962 void
    963 copyParsedURL(ParsedURL *p, ParsedURL *q)
    964 {
    965     p->scheme = q->scheme;
    966     p->port = q->port;
    967     p->is_nocache = q->is_nocache;
    968     p->user = ALLOC_STR(q->user);
    969     p->pass = ALLOC_STR(q->pass);
    970     p->host = ALLOC_STR(q->host);
    971     p->file = ALLOC_STR(q->file);
    972     p->real_file = ALLOC_STR(q->real_file);
    973     p->label = ALLOC_STR(q->label);
    974     p->query = ALLOC_STR(q->query);
    975 }
    976 
    977 void
    978 parseURL2(char *url, ParsedURL *pu, ParsedURL *current)
    979 {
    980     char *p;
    981     Str tmp;
    982     int relative_uri = FALSE;
    983 
    984     parseURL(url, pu, current);
    985 #ifndef USE_W3MMAILER
    986     if (pu->scheme == SCM_MAILTO)
    987 	return;
    988 #endif
    989     if (pu->scheme == SCM_DATA)
    990 	return;
    991     if (pu->scheme == SCM_NEWS || pu->scheme == SCM_NEWS_GROUP) {
    992 	if (pu->file && !strchr(pu->file, '@') &&
    993 	    (!(p = strchr(pu->file, '/')) || strchr(p + 1, '-') ||
    994 	     *(p + 1) == '\0'))
    995 	    pu->scheme = SCM_NEWS_GROUP;
    996 	else
    997 	    pu->scheme = SCM_NEWS;
    998 	return;
    999     }
   1000     if (pu->scheme == SCM_NNTP || pu->scheme == SCM_NNTP_GROUP) {
   1001 	if (pu->file && *pu->file == '/')
   1002 	    pu->file = allocStr(pu->file + 1, -1);
   1003 	if (pu->file && !strchr(pu->file, '@') &&
   1004 	    (!(p = strchr(pu->file, '/')) || strchr(p + 1, '-') ||
   1005 	     *(p + 1) == '\0'))
   1006 	    pu->scheme = SCM_NNTP_GROUP;
   1007 	else
   1008 	    pu->scheme = SCM_NNTP;
   1009 	if (current && (current->scheme == SCM_NNTP ||
   1010 			current->scheme == SCM_NNTP_GROUP)) {
   1011 	    if (pu->host == NULL) {
   1012 		pu->host = current->host;
   1013 		pu->port = current->port;
   1014 	    }
   1015 	}
   1016 	return;
   1017     }
   1018     if (pu->scheme == SCM_LOCAL) {
   1019 	char *q = expandName(file_unquote(pu->file));
   1020 #ifdef SUPPORT_DOS_DRIVE_PREFIX
   1021 	Str drive;
   1022 	if (IS_ALPHA(q[0]) && q[1] == ':') {
   1023 	    drive = Strnew_charp_n(q, 2);
   1024 	    Strcat_charp(drive, file_quote(q+2));
   1025 	    pu->file = drive->ptr;
   1026 	}
   1027 	else
   1028 #endif
   1029 	    pu->file = file_quote(q);
   1030     }
   1031 
   1032     if (current && (pu->scheme == current->scheme ||
   1033 		    (pu->scheme == SCM_FTP && current->scheme == SCM_FTPDIR) ||
   1034 		    (pu->scheme == SCM_LOCAL &&
   1035 		     current->scheme == SCM_LOCAL_CGI))
   1036 	&& pu->host == NULL) {
   1037 	/* Copy omitted element from the current URL */
   1038 	pu->user = current->user;
   1039 	pu->pass = current->pass;
   1040 	pu->host = current->host;
   1041 	pu->port = current->port;
   1042 	if (pu->file && *pu->file) {
   1043 #ifdef USE_EXTERNAL_URI_LOADER
   1044 	    if (pu->scheme == SCM_UNKNOWN
   1045 		&& strchr(pu->file, ':') == NULL
   1046 		&& current && (p = strchr(current->file, ':')) != NULL) {
   1047 		pu->file = Sprintf("%s:%s",
   1048 				   allocStr(current->file,
   1049 					    p - current->file), pu->file)->ptr;
   1050 	    }
   1051 	    else
   1052 #endif
   1053 		if (
   1054 #ifdef USE_GOPHER
   1055 		       pu->scheme != SCM_GOPHER &&
   1056 #endif				/* USE_GOPHER */
   1057 		       pu->file[0] != '/'
   1058 #ifdef SUPPORT_DOS_DRIVE_PREFIX
   1059 		       && !(pu->scheme == SCM_LOCAL && IS_ALPHA(pu->file[0])
   1060 			    && pu->file[1] == ':')
   1061 #endif
   1062 		) {
   1063 		/* file is relative [process 1] */
   1064 		p = pu->file;
   1065 		if (current->file) {
   1066 		    tmp = Strnew_charp(current->file);
   1067 		    while (tmp->length > 0) {
   1068 			if (Strlastchar(tmp) == '/')
   1069 			    break;
   1070 			Strshrink(tmp, 1);
   1071 		    }
   1072 		    Strcat_charp(tmp, p);
   1073 		    pu->file = tmp->ptr;
   1074 		    relative_uri = TRUE;
   1075 		}
   1076 	    }
   1077 #ifdef USE_GOPHER
   1078 	    else if (pu->scheme == SCM_GOPHER && pu->file[0] == '/') {
   1079 		p = pu->file;
   1080 		pu->file = allocStr(p + 1, -1);
   1081 	    }
   1082 #endif				/* USE_GOPHER */
   1083 	}
   1084 	else {			/* scheme:[?query][#label] */
   1085 	    pu->file = current->file;
   1086 	    if (!pu->query)
   1087 		pu->query = current->query;
   1088 	}
   1089 	/* comment: query part need not to be completed
   1090 	 * from the current URL. */
   1091     }
   1092     if (pu->file) {
   1093 #ifdef __EMX__
   1094 	if (pu->scheme == SCM_LOCAL) {
   1095 	    if (strncmp(pu->file, "/$LIB/", 6)) {
   1096 		char abs[_MAX_PATH];
   1097 
   1098 		_abspath(abs, file_unquote(pu->file), _MAX_PATH);
   1099 		pu->file = file_quote(cleanupName(abs));
   1100 	    }
   1101 	}
   1102 #else
   1103 	if (pu->scheme == SCM_LOCAL && pu->file[0] != '/' &&
   1104 #ifdef SUPPORT_DOS_DRIVE_PREFIX	/* for 'drive:' */
   1105 	    !(IS_ALPHA(pu->file[0]) && pu->file[1] == ':') &&
   1106 #endif
   1107 	    strcmp(pu->file, "-")) {
   1108 	    /* local file, relative path */
   1109 	    tmp = Strnew_charp(CurrentDir);
   1110 	    if (Strlastchar(tmp) != '/')
   1111 		Strcat_char(tmp, '/');
   1112 	    Strcat_charp(tmp, file_unquote(pu->file));
   1113 	    pu->file = file_quote(cleanupName(tmp->ptr));
   1114 	}
   1115 #endif
   1116 	else if (pu->scheme == SCM_HTTP
   1117 #ifdef USE_SSL
   1118 		 || pu->scheme == SCM_HTTPS
   1119 #endif
   1120 	    ) {
   1121 	    if (relative_uri) {
   1122 		/* In this case, pu->file is created by [process 1] above.
   1123 		 * pu->file may contain relative path (for example, 
   1124 		 * "/foo/../bar/./baz.html"), cleanupName() must be applied.
   1125 		 * When the entire abs_path is given, it still may contain
   1126 		 * elements like `//', `..' or `.' in the pu->file. It is 
   1127 		 * server's responsibility to canonicalize such path.
   1128 		 */
   1129 		pu->file = cleanupName(pu->file);
   1130 	    }
   1131 	}
   1132 	else if (
   1133 #ifdef USE_GOPHER
   1134 		    pu->scheme != SCM_GOPHER &&
   1135 #endif				/* USE_GOPHER */
   1136 		    pu->file[0] == '/') {
   1137 	    /*
   1138 	     * this happens on the following conditions:
   1139 	     * (1) ftp scheme (2) local, looks like absolute path.
   1140 	     * In both case, there must be no side effect with
   1141 	     * cleanupName(). (I hope so...)
   1142 	     */
   1143 	    pu->file = cleanupName(pu->file);
   1144 	}
   1145 	if (pu->scheme == SCM_LOCAL) {
   1146 #ifdef SUPPORT_NETBIOS_SHARE
   1147 	    if (pu->host && strcmp(pu->host, "localhost") != 0) {
   1148 		Str tmp = Strnew_charp("//");
   1149 		Strcat_m_charp(tmp, pu->host,
   1150 			       cleanupName(file_unquote(pu->file)), NULL);
   1151 		pu->real_file = tmp->ptr;
   1152 	    }
   1153 	    else
   1154 #endif
   1155 		pu->real_file = cleanupName(file_unquote(pu->file));
   1156 	}
   1157     }
   1158 }
   1159 
   1160 static Str
   1161 _parsedURL2Str(ParsedURL *pu, int pass)
   1162 {
   1163     Str tmp;
   1164     static char *scheme_str[] = {
   1165 	"http", "gopher", "ftp", "ftp", "file", "file", "exec", "nntp", "nntp",
   1166 	"news", "news", "data", "mailto",
   1167 #ifdef USE_SSL
   1168 	"https",
   1169 #endif				/* USE_SSL */
   1170     };
   1171 
   1172     if (pu->scheme == SCM_MISSING) {
   1173 	return Strnew_charp("???");
   1174     }
   1175     else if (pu->scheme == SCM_UNKNOWN) {
   1176 	return Strnew_charp(pu->file);
   1177     }
   1178     if (pu->host == NULL && pu->file == NULL && pu->label != NULL) {
   1179 	/* local label */
   1180 	return Sprintf("#%s", pu->label);
   1181     }
   1182     if (pu->scheme == SCM_LOCAL && !strcmp(pu->file, "-")) {
   1183 	tmp = Strnew_charp("-");
   1184 	if (pu->label) {
   1185 	    Strcat_char(tmp, '#');
   1186 	    Strcat_charp(tmp, pu->label);
   1187 	}
   1188 	return tmp;
   1189     }
   1190     tmp = Strnew_charp(scheme_str[pu->scheme]);
   1191     Strcat_char(tmp, ':');
   1192 #ifndef USE_W3MMAILER
   1193     if (pu->scheme == SCM_MAILTO) {
   1194 	Strcat_charp(tmp, pu->file);
   1195 	if (pu->query) {
   1196 	    Strcat_char(tmp, '?');
   1197 	    Strcat_charp(tmp, pu->query);
   1198 	}
   1199 	return tmp;
   1200     }
   1201 #endif
   1202     if (pu->scheme == SCM_DATA) {
   1203 	Strcat_charp(tmp, pu->file);
   1204 	return tmp;
   1205     }
   1206 #ifdef USE_NNTP
   1207     if (pu->scheme != SCM_NEWS && pu->scheme != SCM_NEWS_GROUP)
   1208 #endif				/* USE_NNTP */
   1209     {
   1210 	Strcat_charp(tmp, "//");
   1211     }
   1212     if (pu->user) {
   1213 	Strcat_charp(tmp, pu->user);
   1214 	if (pass && pu->pass) {
   1215 	    Strcat_char(tmp, ':');
   1216 	    Strcat_charp(tmp, pu->pass);
   1217 	}
   1218 	Strcat_char(tmp, '@');
   1219     }
   1220     if (pu->host) {
   1221 	Strcat_charp(tmp, pu->host);
   1222 	if (pu->port != DefaultPort[pu->scheme]) {
   1223 	    Strcat_char(tmp, ':');
   1224 	    Strcat(tmp, Sprintf("%d", pu->port));
   1225 	}
   1226     }
   1227     if (
   1228 #ifdef USE_NNTP
   1229 	   pu->scheme != SCM_NEWS && pu->scheme != SCM_NEWS_GROUP &&
   1230 #endif				/* USE_NNTP */
   1231 	   (pu->file == NULL || (pu->file[0] != '/'
   1232 #ifdef SUPPORT_DOS_DRIVE_PREFIX
   1233 				 && !(IS_ALPHA(pu->file[0])
   1234 				      && pu->file[1] == ':'
   1235 				      && pu->host == NULL)
   1236 #endif
   1237 	    )))
   1238 	Strcat_char(tmp, '/');
   1239     Strcat_charp(tmp, pu->file);
   1240     if (pu->scheme == SCM_FTPDIR && Strlastchar(tmp) != '/')
   1241 	Strcat_char(tmp, '/');
   1242     if (pu->query) {
   1243 	Strcat_char(tmp, '?');
   1244 	Strcat_charp(tmp, pu->query);
   1245     }
   1246     if (pu->label) {
   1247 	Strcat_char(tmp, '#');
   1248 	Strcat_charp(tmp, pu->label);
   1249     }
   1250     return tmp;
   1251 }
   1252 
   1253 Str
   1254 parsedURL2Str(ParsedURL *pu)
   1255 {
   1256     return _parsedURL2Str(pu, FALSE);
   1257 }
   1258 
   1259 int
   1260 getURLScheme(char **url)
   1261 {
   1262     char *p = *url, *q;
   1263     int i;
   1264     int scheme = SCM_MISSING;
   1265 
   1266     while (*p && (IS_ALNUM(*p) || *p == '.' || *p == '+' || *p == '-'))
   1267 	p++;
   1268     if (*p == ':') {		/* scheme found */
   1269 	scheme = SCM_UNKNOWN;
   1270 	for (i = 0; (q = schemetable[i].cmdname) != NULL; i++) {
   1271 	    int len = strlen(q);
   1272 	    if (!strncasecmp(q, *url, len) && (*url)[len] == ':') {
   1273 		scheme = schemetable[i].cmd;
   1274 		*url = p + 1;
   1275 		break;
   1276 	    }
   1277 	}
   1278     }
   1279     return scheme;
   1280 }
   1281 
   1282 static char *
   1283 otherinfo(ParsedURL *target, ParsedURL *current, char *referer)
   1284 {
   1285     Str s = Strnew();
   1286 
   1287     Strcat_charp(s, "User-Agent: ");
   1288     if (UserAgent == NULL || *UserAgent == '\0')
   1289 	Strcat_charp(s, w3m_version);
   1290     else
   1291 	Strcat_charp(s, UserAgent);
   1292     Strcat_charp(s, "\r\n");
   1293 
   1294     Strcat_m_charp(s, "Accept: ", AcceptMedia, "\r\n", NULL);
   1295     Strcat_m_charp(s, "Accept-Encoding: ", AcceptEncoding, "\r\n", NULL);
   1296     Strcat_m_charp(s, "Accept-Language: ", AcceptLang, "\r\n", NULL);
   1297 
   1298     if (target->host) {
   1299 	Strcat_charp(s, "Host: ");
   1300 	Strcat_charp(s, target->host);
   1301 	if (target->port != DefaultPort[target->scheme])
   1302 	    Strcat(s, Sprintf(":%d", target->port));
   1303 	Strcat_charp(s, "\r\n");
   1304     }
   1305     if (target->is_nocache || NoCache) {
   1306 	Strcat_charp(s, "Pragma: no-cache\r\n");
   1307 	Strcat_charp(s, "Cache-control: no-cache\r\n");
   1308     }
   1309     if (!NoSendReferer) {
   1310 #ifdef USE_SSL
   1311         if (current && current->scheme == SCM_HTTPS && target->scheme != SCM_HTTPS) {
   1312 	  /* Don't send Referer: if https:// -> http:// */
   1313 	}
   1314 	else
   1315 #endif
   1316 	if (referer == NULL && current && current->scheme != SCM_LOCAL &&
   1317 	    (current->scheme != SCM_FTP ||
   1318 	     (current->user == NULL && current->pass == NULL))) {
   1319 	    char *p = current->label;
   1320 	    Strcat_charp(s, "Referer: ");
   1321 	    current->label = NULL;
   1322 	    Strcat(s, parsedURL2Str(current));
   1323 	    current->label = p;
   1324 	    Strcat_charp(s, "\r\n");
   1325 	}
   1326 	else if (referer != NULL && referer != NO_REFERER) {
   1327 	    char *p = strchr(referer, '#');
   1328 	    Strcat_charp(s, "Referer: ");
   1329 	    if (p)
   1330 		Strcat_charp_n(s, referer, p - referer);
   1331 	    else
   1332 		Strcat_charp(s, referer);
   1333 	    Strcat_charp(s, "\r\n");
   1334 	}
   1335     }
   1336     return s->ptr;
   1337 }
   1338 
   1339 Str
   1340 HTTPrequestMethod(HRequest *hr)
   1341 {
   1342     switch (hr->command) {
   1343     case HR_COMMAND_CONNECT:
   1344 	return Strnew_charp("CONNECT");
   1345     case HR_COMMAND_POST:
   1346 	return Strnew_charp("POST");
   1347 	break;
   1348     case HR_COMMAND_HEAD:
   1349 	return Strnew_charp("HEAD");
   1350 	break;
   1351     case HR_COMMAND_GET:
   1352     default:
   1353 	return Strnew_charp("GET");
   1354     }
   1355     return NULL;
   1356 }
   1357 
   1358 Str
   1359 HTTPrequestURI(ParsedURL *pu, HRequest *hr)
   1360 {
   1361     Str tmp = Strnew();
   1362     if (hr->command == HR_COMMAND_CONNECT) {
   1363 	Strcat_charp(tmp, pu->host);
   1364 	Strcat(tmp, Sprintf(":%d", pu->port));
   1365     }
   1366     else if (hr->flag & HR_FLAG_LOCAL) {
   1367 	Strcat_charp(tmp, pu->file);
   1368 	if (pu->query) {
   1369 	    Strcat_char(tmp, '?');
   1370 	    Strcat_charp(tmp, pu->query);
   1371 	}
   1372     }
   1373     else {
   1374 	char *save_label = pu->label;
   1375 	pu->label = NULL;
   1376 	Strcat(tmp, _parsedURL2Str(pu, TRUE));
   1377 	pu->label = save_label;
   1378     }
   1379     return tmp;
   1380 }
   1381 
   1382 static Str
   1383 HTTPrequest(ParsedURL *pu, ParsedURL *current, HRequest *hr, TextList *extra)
   1384 {
   1385     Str tmp;
   1386     TextListItem *i;
   1387     int seen_www_auth = 0;
   1388 #ifdef USE_COOKIE
   1389     Str cookie;
   1390 #endif				/* USE_COOKIE */
   1391     tmp = HTTPrequestMethod(hr);
   1392     Strcat_charp(tmp, " ");
   1393     Strcat_charp(tmp, HTTPrequestURI(pu, hr)->ptr);
   1394     Strcat_charp(tmp, " HTTP/1.0\r\n");
   1395     if (hr->referer == NO_REFERER)
   1396 	Strcat_charp(tmp, otherinfo(pu, NULL, NULL));
   1397     else
   1398 	Strcat_charp(tmp, otherinfo(pu, current, hr->referer));
   1399     if (extra != NULL)
   1400 	for (i = extra->first; i != NULL; i = i->next) {
   1401 	    if (strncasecmp(i->ptr, "Authorization:",
   1402 			    sizeof("Authorization:") - 1) == 0) {
   1403 		seen_www_auth = 1;
   1404 #ifdef USE_SSL
   1405 		if (hr->command == HR_COMMAND_CONNECT)
   1406 		    continue;
   1407 #endif
   1408 	    }
   1409 	    if (strncasecmp(i->ptr, "Proxy-Authorization:",
   1410 			    sizeof("Proxy-Authorization:") - 1) == 0) {
   1411 #ifdef USE_SSL
   1412 		if (pu->scheme == SCM_HTTPS
   1413 		    && hr->command != HR_COMMAND_CONNECT)
   1414 		    continue;
   1415 #endif
   1416 	    }
   1417 	    Strcat_charp(tmp, i->ptr);
   1418 	}
   1419 
   1420 #ifdef USE_COOKIE
   1421     if (hr->command != HR_COMMAND_CONNECT &&
   1422 	use_cookie && (cookie = find_cookie(pu))) {
   1423 	Strcat_charp(tmp, "Cookie: ");
   1424 	Strcat(tmp, cookie);
   1425 	Strcat_charp(tmp, "\r\n");
   1426 	/* [DRAFT 12] s. 10.1 */
   1427 	if (cookie->ptr[0] != '$')
   1428 	    Strcat_charp(tmp, "Cookie2: $Version=\"1\"\r\n");
   1429     }
   1430 #endif				/* USE_COOKIE */
   1431     if (hr->command == HR_COMMAND_POST) {
   1432 	if (hr->request->enctype == FORM_ENCTYPE_MULTIPART) {
   1433 	    Strcat_charp(tmp, "Content-type: multipart/form-data; boundary=");
   1434 	    Strcat_charp(tmp, hr->request->boundary);
   1435 	    Strcat_charp(tmp, "\r\n");
   1436 	    Strcat(tmp,
   1437 		   Sprintf("Content-length: %ld\r\n", hr->request->length));
   1438 	    Strcat_charp(tmp, "\r\n");
   1439 	}
   1440 	else {
   1441 	    if (!override_content_type) {
   1442 		Strcat_charp(tmp,
   1443 			     "Content-type: application/x-www-form-urlencoded\r\n");
   1444 	    }
   1445 	    Strcat(tmp,
   1446 		   Sprintf("Content-length: %ld\r\n", hr->request->length));
   1447 	    if (header_string)
   1448 		Strcat(tmp, header_string);
   1449 	    Strcat_charp(tmp, "\r\n");
   1450 	    Strcat_charp_n(tmp, hr->request->body, hr->request->length);
   1451 	    Strcat_charp(tmp, "\r\n");
   1452 	}
   1453     }
   1454     else {
   1455 	if (header_string)
   1456 	    Strcat(tmp, header_string);
   1457 	Strcat_charp(tmp, "\r\n");
   1458     }
   1459 #ifdef DEBUG
   1460     fprintf(stderr, "HTTPrequest: [ %s ]\n\n", tmp->ptr);
   1461 #endif				/* DEBUG */
   1462     return tmp;
   1463 }
   1464 
   1465 void
   1466 init_stream(URLFile *uf, int scheme, InputStream stream)
   1467 {
   1468     memset(uf, 0, sizeof(URLFile));
   1469     uf->stream = stream;
   1470     uf->scheme = scheme;
   1471     uf->encoding = ENC_7BIT;
   1472     uf->is_cgi = FALSE;
   1473     uf->compression = CMP_NOCOMPRESS;
   1474     uf->content_encoding = CMP_NOCOMPRESS;
   1475     uf->guess_type = NULL;
   1476     uf->ext = NULL;
   1477     uf->modtime = -1;
   1478 }
   1479 
   1480 URLFile
   1481 openURL(char *url, ParsedURL *pu, ParsedURL *current,
   1482 	URLOption *option, FormList *request, TextList *extra_header,
   1483 	URLFile *ouf, HRequest *hr, unsigned char *status)
   1484 {
   1485     Str tmp;
   1486     int sock, scheme;
   1487     char *p, *q, *u;
   1488     URLFile uf;
   1489     HRequest hr0;
   1490 #ifdef USE_SSL
   1491     SSL *sslh = NULL;
   1492 #endif				/* USE_SSL */
   1493 
   1494     if (hr == NULL)
   1495 	hr = &hr0;
   1496 
   1497     if (ouf) {
   1498 	uf = *ouf;
   1499     }
   1500     else {
   1501 	init_stream(&uf, SCM_MISSING, NULL);
   1502     }
   1503 
   1504     u = url;
   1505     scheme = getURLScheme(&u);
   1506     if (current == NULL && scheme == SCM_MISSING && !ArgvIsURL)
   1507 	u = file_to_url(url);	/* force to local file */
   1508     else
   1509 	u = url;
   1510   retry:
   1511     parseURL2(u, pu, current);
   1512     if (pu->scheme == SCM_LOCAL && pu->file == NULL) {
   1513 	if (pu->label != NULL) {
   1514 	    /* #hogege is not a label but a filename */
   1515 	    Str tmp2 = Strnew_charp("#");
   1516 	    Strcat_charp(tmp2, pu->label);
   1517 	    pu->file = tmp2->ptr;
   1518 	    pu->real_file = cleanupName(file_unquote(pu->file));
   1519 	    pu->label = NULL;
   1520 	}
   1521 	else {
   1522 	    /* given URL must be null string */
   1523 #ifdef SOCK_DEBUG
   1524 	    sock_log("given URL must be null string\n");
   1525 #endif
   1526 	    return uf;
   1527 	}
   1528     }
   1529 
   1530     uf.scheme = pu->scheme;
   1531     uf.url = parsedURL2Str(pu)->ptr;
   1532     pu->is_nocache = (option->flag & RG_NOCACHE);
   1533     uf.ext = filename_extension(pu->file, 1);
   1534 
   1535     hr->command = HR_COMMAND_GET;
   1536     hr->flag = 0;
   1537     hr->referer = option->referer;
   1538     hr->request = request;
   1539 
   1540     switch (pu->scheme) {
   1541     case SCM_LOCAL:
   1542     case SCM_LOCAL_CGI:
   1543 	if (request && request->body)
   1544 	    /* local CGI: POST */
   1545 	    uf.stream = newFileStream(localcgi_post(pu->real_file, pu->query,
   1546 						    request, option->referer),
   1547 				      (void (*)())fclose);
   1548 	else
   1549 	    /* lodal CGI: GET */
   1550 	    uf.stream = newFileStream(localcgi_get(pu->real_file, pu->query,
   1551 						   option->referer),
   1552 				      (void (*)())fclose);
   1553 	if (uf.stream) {
   1554 	    uf.is_cgi = TRUE;
   1555 	    uf.scheme = pu->scheme = SCM_LOCAL_CGI;
   1556 	    return uf;
   1557 	}
   1558 	examineFile(pu->real_file, &uf);
   1559 	if (uf.stream == NULL) {
   1560 	    if (dir_exist(pu->real_file)) {
   1561 		add_index_file(pu, &uf);
   1562 		if (uf.stream == NULL)
   1563 		    return uf;
   1564 	    }
   1565 	    else if (document_root != NULL) {
   1566 		tmp = Strnew_charp(document_root);
   1567 		if (Strlastchar(tmp) != '/' && pu->file[0] != '/')
   1568 		    Strcat_char(tmp, '/');
   1569 		Strcat_charp(tmp, pu->file);
   1570 		p = cleanupName(tmp->ptr);
   1571 		q = cleanupName(file_unquote(p));
   1572 		if (dir_exist(q)) {
   1573 		    pu->file = p;
   1574 		    pu->real_file = q;
   1575 		    add_index_file(pu, &uf);
   1576 		    if (uf.stream == NULL) {
   1577 			return uf;
   1578 		    }
   1579 		}
   1580 		else {
   1581 		    examineFile(q, &uf);
   1582 		    if (uf.stream) {
   1583 			pu->file = p;
   1584 			pu->real_file = q;
   1585 		    }
   1586 		}
   1587 	    }
   1588 	}
   1589 	if (uf.stream == NULL && retryAsHttp && url[0] != '/') {
   1590 	    if (scheme == SCM_MISSING || scheme == SCM_UNKNOWN) {
   1591 		/* retry it as "http://" */
   1592 		u = Strnew_m_charp("http://", url, NULL)->ptr;
   1593 		goto retry;
   1594 	    }
   1595 	}
   1596 	return uf;
   1597     case SCM_FTP:
   1598     case SCM_FTPDIR:
   1599 	if (pu->file == NULL)
   1600 	    pu->file = allocStr("/", -1);
   1601 	if (non_null(FTP_proxy) &&
   1602 	    !Do_not_use_proxy &&
   1603 	    pu->host != NULL && !check_no_proxy(pu->host)) {
   1604 	    hr->flag |= HR_FLAG_PROXY;
   1605 	    sock = openSocket(FTP_proxy_parsed.host,
   1606 			      schemetable[FTP_proxy_parsed.scheme].cmdname,
   1607 			      FTP_proxy_parsed.port);
   1608 	    if (sock < 0)
   1609 		return uf;
   1610 	    uf.scheme = SCM_HTTP;
   1611 	    tmp = HTTPrequest(pu, current, hr, extra_header);
   1612 	    write(sock, tmp->ptr, tmp->length);
   1613 	}
   1614 	else {
   1615 	    uf.stream = openFTPStream(pu, &uf);
   1616 	    uf.scheme = pu->scheme;
   1617 	    return uf;
   1618 	}
   1619 	break;
   1620     case SCM_HTTP:
   1621 #ifdef USE_SSL
   1622     case SCM_HTTPS:
   1623 #endif				/* USE_SSL */
   1624 	if (pu->file == NULL)
   1625 	    pu->file = allocStr("/", -1);
   1626 	if (request && request->method == FORM_METHOD_POST && request->body)
   1627 	    hr->command = HR_COMMAND_POST;
   1628 	if (request && request->method == FORM_METHOD_HEAD)
   1629 	    hr->command = HR_COMMAND_HEAD;
   1630 	if ((
   1631 #ifdef USE_SSL
   1632 		(pu->scheme == SCM_HTTPS) ? non_null(HTTPS_proxy) :
   1633 #endif				/* USE_SSL */
   1634 		non_null(HTTP_proxy)) && !Do_not_use_proxy &&
   1635 	    pu->host != NULL && !check_no_proxy(pu->host)) {
   1636 	    hr->flag |= HR_FLAG_PROXY;
   1637 #ifdef USE_SSL
   1638 	    if (pu->scheme == SCM_HTTPS && *status == HTST_CONNECT) {
   1639 		sock = ssl_socket_of(ouf->stream);
   1640 		if (!(sslh = openSSLHandle(sock, pu->host,
   1641 					   &uf.ssl_certificate))) {
   1642 		    *status = HTST_MISSING;
   1643 		    return uf;
   1644 		}
   1645 	    }
   1646 	    else if (pu->scheme == SCM_HTTPS) {
   1647 		sock = openSocket(HTTPS_proxy_parsed.host,
   1648 				  schemetable[HTTPS_proxy_parsed.scheme].
   1649 				  cmdname, HTTPS_proxy_parsed.port);
   1650 		sslh = NULL;
   1651 	    }
   1652 	    else {
   1653 #endif				/* USE_SSL */
   1654 		sock = openSocket(HTTP_proxy_parsed.host,
   1655 				  schemetable[HTTP_proxy_parsed.scheme].
   1656 				  cmdname, HTTP_proxy_parsed.port);
   1657 #ifdef USE_SSL
   1658 		sslh = NULL;
   1659 	    }
   1660 #endif				/* USE_SSL */
   1661 	    if (sock < 0) {
   1662 #ifdef SOCK_DEBUG
   1663 		sock_log("Can't open socket\n");
   1664 #endif
   1665 		return uf;
   1666 	    }
   1667 #ifdef USE_SSL
   1668 	    if (pu->scheme == SCM_HTTPS) {
   1669 		if (*status == HTST_NORMAL) {
   1670 		    hr->command = HR_COMMAND_CONNECT;
   1671 		    tmp = HTTPrequest(pu, current, hr, extra_header);
   1672 		    *status = HTST_CONNECT;
   1673 		}
   1674 		else {
   1675 		    hr->flag |= HR_FLAG_LOCAL;
   1676 		    tmp = HTTPrequest(pu, current, hr, extra_header);
   1677 		    *status = HTST_NORMAL;
   1678 		}
   1679 	    }
   1680 	    else
   1681 #endif				/* USE_SSL */
   1682 	    {
   1683 		tmp = HTTPrequest(pu, current, hr, extra_header);
   1684 		*status = HTST_NORMAL;
   1685 	    }
   1686 	}
   1687 	else {
   1688 	    sock = openSocket(pu->host,
   1689 			      schemetable[pu->scheme].cmdname, pu->port);
   1690 	    if (sock < 0) {
   1691 		*status = HTST_MISSING;
   1692 		return uf;
   1693 	    }
   1694 #ifdef USE_SSL
   1695 	    if (pu->scheme == SCM_HTTPS) {
   1696 		if (!(sslh = openSSLHandle(sock, pu->host,
   1697 					   &uf.ssl_certificate))) {
   1698 		    *status = HTST_MISSING;
   1699 		    return uf;
   1700 		}
   1701 	    }
   1702 #endif				/* USE_SSL */
   1703 	    hr->flag |= HR_FLAG_LOCAL;
   1704 	    tmp = HTTPrequest(pu, current, hr, extra_header);
   1705 	    *status = HTST_NORMAL;
   1706 	}
   1707 #ifdef USE_SSL
   1708 	if (pu->scheme == SCM_HTTPS) {
   1709 	    uf.stream = newSSLStream(sslh, sock);
   1710 	    if (sslh)
   1711 		SSL_write(sslh, tmp->ptr, tmp->length);
   1712 	    else
   1713 		write(sock, tmp->ptr, tmp->length);
   1714 	    if(w3m_reqlog){
   1715 		FILE *ff = fopen(w3m_reqlog, "a");
   1716 		if (sslh)
   1717 		    fputs("HTTPS: request via SSL\n", ff);
   1718 		else
   1719 		    fputs("HTTPS: request without SSL\n", ff);
   1720 		fwrite(tmp->ptr, sizeof(char), tmp->length, ff);
   1721 		fclose(ff);
   1722 	    }
   1723 	    if (hr->command == HR_COMMAND_POST &&
   1724 		request->enctype == FORM_ENCTYPE_MULTIPART) {
   1725 		if (sslh)
   1726 		    SSL_write_from_file(sslh, request->body);
   1727 		else
   1728 		    write_from_file(sock, request->body);
   1729 	    }
   1730 	    return uf;
   1731 	}
   1732 	else
   1733 #endif				/* USE_SSL */
   1734 	{
   1735 	    write(sock, tmp->ptr, tmp->length);
   1736 	    if(w3m_reqlog){
   1737 		FILE *ff = fopen(w3m_reqlog, "a");
   1738 		fwrite(tmp->ptr, sizeof(char), tmp->length, ff);
   1739 		fclose(ff);
   1740 	    }
   1741 	    if (hr->command == HR_COMMAND_POST &&
   1742 		request->enctype == FORM_ENCTYPE_MULTIPART)
   1743 		write_from_file(sock, request->body);
   1744 	}
   1745 	break;
   1746 #ifdef USE_GOPHER
   1747     case SCM_GOPHER:
   1748 	if (non_null(GOPHER_proxy) &&
   1749 	    !Do_not_use_proxy &&
   1750 	    pu->host != NULL && !check_no_proxy(pu->host)) {
   1751 	    hr->flag |= HR_FLAG_PROXY;
   1752 	    sock = openSocket(GOPHER_proxy_parsed.host,
   1753 			      schemetable[GOPHER_proxy_parsed.scheme].cmdname,
   1754 			      GOPHER_proxy_parsed.port);
   1755 	    if (sock < 0)
   1756 		return uf;
   1757 	    uf.scheme = SCM_HTTP;
   1758 	    tmp = HTTPrequest(pu, current, hr, extra_header);
   1759 	}
   1760 	else {
   1761 	    sock = openSocket(pu->host,
   1762 			      schemetable[pu->scheme].cmdname, pu->port);
   1763 	    if (sock < 0)
   1764 		return uf;
   1765 	    if (pu->file == NULL)
   1766 		pu->file = "1";
   1767 	    tmp = Strnew_charp(file_unquote(pu->file));
   1768 	    Strcat_char(tmp, '\n');
   1769 	}
   1770 	write(sock, tmp->ptr, tmp->length);
   1771 	break;
   1772 #endif				/* USE_GOPHER */
   1773 #ifdef USE_NNTP
   1774     case SCM_NNTP:
   1775     case SCM_NNTP_GROUP:
   1776     case SCM_NEWS:
   1777     case SCM_NEWS_GROUP:
   1778 	if (pu->scheme == SCM_NNTP || pu->scheme == SCM_NEWS)
   1779 	    uf.scheme = SCM_NEWS;
   1780 	else
   1781 	    uf.scheme = SCM_NEWS_GROUP;
   1782 	uf.stream = openNewsStream(pu);
   1783 	return uf;
   1784 #endif				/* USE_NNTP */
   1785     case SCM_DATA:
   1786 	if (pu->file == NULL)
   1787 	    return uf;
   1788 	p = Strnew_charp(pu->file)->ptr;
   1789 	q = strchr(p, ',');
   1790 	if (q == NULL)
   1791 	    return uf;
   1792 	*q++ = '\0';
   1793 	tmp = Strnew_charp(q);
   1794 	q = strrchr(p, ';');
   1795 	if (q != NULL && !strcmp(q, ";base64")) {
   1796 	    *q = '\0';
   1797 	    uf.encoding = ENC_BASE64;
   1798 	}
   1799 	else
   1800 	    tmp = Str_url_unquote(tmp, FALSE, FALSE);
   1801 	uf.stream = newStrStream(tmp);
   1802 	uf.guess_type = (*p != '\0') ? p : "text/plain";
   1803 	return uf;
   1804     case SCM_UNKNOWN:
   1805     default:
   1806 	return uf;
   1807     }
   1808     uf.stream = newInputStream(sock);
   1809     return uf;
   1810 }
   1811 
   1812 /* add index_file if exists */
   1813 static void
   1814 add_index_file(ParsedURL *pu, URLFile *uf)
   1815 {
   1816     char *p, *q;
   1817     TextList *index_file_list = NULL;
   1818     TextListItem *ti;
   1819 
   1820     if (non_null(index_file))
   1821 	index_file_list = make_domain_list(index_file);
   1822     if (index_file_list == NULL) {
   1823 	uf->stream = NULL;
   1824 	return;
   1825     }
   1826     for (ti = index_file_list->first; ti; ti = ti->next) {
   1827 	p = Strnew_m_charp(pu->file, "/", file_quote(ti->ptr), NULL)->ptr;
   1828 	p = cleanupName(p);
   1829 	q = cleanupName(file_unquote(p));
   1830 	examineFile(q, uf);
   1831 	if (uf->stream != NULL) {
   1832 	    pu->file = p;
   1833 	    pu->real_file = q;
   1834 	    return;
   1835 	}
   1836     }
   1837 }
   1838 
   1839 static char *
   1840 guessContentTypeFromTable(struct table2 *table, char *filename)
   1841 {
   1842     struct table2 *t;
   1843     char *p;
   1844     if (table == NULL)
   1845 	return NULL;
   1846     p = &filename[strlen(filename) - 1];
   1847     while (filename < p && *p != '.')
   1848 	p--;
   1849     if (p == filename)
   1850 	return NULL;
   1851     p++;
   1852     for (t = table; t->item1; t++) {
   1853 	if (!strcmp(p, t->item1))
   1854 	    return t->item2;
   1855     }
   1856     for (t = table; t->item1; t++) {
   1857 	if (!strcasecmp(p, t->item1))
   1858 	    return t->item2;
   1859     }
   1860     return NULL;
   1861 }
   1862 
   1863 char *
   1864 guessContentType(char *filename)
   1865 {
   1866     char *ret;
   1867     int i;
   1868 
   1869     if (filename == NULL)
   1870 	return NULL;
   1871     if (mimetypes_list == NULL)
   1872 	goto no_user_mimetypes;
   1873 
   1874     for (i = 0; i < mimetypes_list->nitem; i++) {
   1875 	if ((ret =
   1876 	     guessContentTypeFromTable(UserMimeTypes[i], filename)) != NULL)
   1877 	    return ret;
   1878     }
   1879 
   1880   no_user_mimetypes:
   1881     return guessContentTypeFromTable(DefaultGuess, filename);
   1882 }
   1883 
   1884 TextList *
   1885 make_domain_list(char *domain_list)
   1886 {
   1887     char *p;
   1888     Str tmp;
   1889     TextList *domains = NULL;
   1890 
   1891     p = domain_list;
   1892     tmp = Strnew_size(64);
   1893     while (*p) {
   1894 	while (*p && IS_SPACE(*p))
   1895 	    p++;
   1896 	Strclear(tmp);
   1897 	while (*p && !IS_SPACE(*p) && *p != ',')
   1898 	    Strcat_char(tmp, *p++);
   1899 	if (tmp->length > 0) {
   1900 	    if (domains == NULL)
   1901 		domains = newTextList();
   1902 	    pushText(domains, tmp->ptr);
   1903 	}
   1904 	while (*p && IS_SPACE(*p))
   1905 	    p++;
   1906 	if (*p == ',')
   1907 	    p++;
   1908     }
   1909     return domains;
   1910 }
   1911 
   1912 static int
   1913 domain_match(char *pat, char *domain)
   1914 {
   1915     if (domain == NULL)
   1916 	return 0;
   1917     if (*pat == '.')
   1918 	pat++;
   1919     for (;;) {
   1920 	if (!strcasecmp(pat, domain))
   1921 	    return 1;
   1922 	domain = strchr(domain, '.');
   1923 	if (domain == NULL)
   1924 	    return 0;
   1925 	domain++;
   1926     }
   1927 }
   1928 
   1929 int
   1930 check_no_proxy(char *domain)
   1931 {
   1932     TextListItem *tl;
   1933     volatile int ret = 0;
   1934     MySignalHandler(*volatile prevtrap) (SIGNAL_ARG) = NULL;
   1935 
   1936     if (NO_proxy_domains == NULL || NO_proxy_domains->nitem == 0 ||
   1937 	domain == NULL)
   1938 	return 0;
   1939     for (tl = NO_proxy_domains->first; tl != NULL; tl = tl->next) {
   1940 	if (domain_match(tl->ptr, domain))
   1941 	    return 1;
   1942     }
   1943     if (!NOproxy_netaddr) {
   1944 	return 0;
   1945     }
   1946     /* 
   1947      * to check noproxy by network addr
   1948      */
   1949     if (SETJMP(AbortLoading) != 0) {
   1950 	ret = 0;
   1951 	goto end;
   1952     }
   1953     TRAP_ON;
   1954     {
   1955 #ifndef INET6
   1956 	struct hostent *he;
   1957 	int n;
   1958 	unsigned char **h_addr_list;
   1959 	char addr[4 * 16], buf[5];
   1960 
   1961 	he = gethostbyname(domain);
   1962 	if (!he) {
   1963 	    ret = 0;
   1964 	    goto end;
   1965 	}
   1966 	for (h_addr_list = (unsigned char **)he->h_addr_list; *h_addr_list;
   1967 	     h_addr_list++) {
   1968 	    sprintf(addr, "%d", h_addr_list[0][0]);
   1969 	    for (n = 1; n < he->h_length; n++) {
   1970 		sprintf(buf, ".%d", h_addr_list[0][n]);
   1971 		strcat(addr, buf);
   1972 	    }
   1973 	    for (tl = NO_proxy_domains->first; tl != NULL; tl = tl->next) {
   1974 		if (strncmp(tl->ptr, addr, strlen(tl->ptr)) == 0) {
   1975 		    ret = 1;
   1976 		    goto end;
   1977 		}
   1978 	    }
   1979 	}
   1980 #else				/* INET6 */
   1981 	int error;
   1982 	struct addrinfo hints;
   1983 	struct addrinfo *res, *res0;
   1984 	char addr[4 * 16];
   1985 	int *af;
   1986 
   1987 	for (af = ai_family_order_table[DNS_order];; af++) {
   1988 	    memset(&hints, 0, sizeof(hints));
   1989 	    hints.ai_family = *af;
   1990 	    error = getaddrinfo(domain, NULL, &hints, &res0);
   1991 	    if (error) {
   1992 		if (*af == PF_UNSPEC) {
   1993 		    break;
   1994 		}
   1995 		/* try next */
   1996 		continue;
   1997 	    }
   1998 	    for (res = res0; res != NULL; res = res->ai_next) {
   1999 		switch (res->ai_family) {
   2000 		case AF_INET:
   2001 		    inet_ntop(AF_INET,
   2002 			      &((struct sockaddr_in *)res->ai_addr)->sin_addr,
   2003 			      addr, sizeof(addr));
   2004 		    break;
   2005 		case AF_INET6:
   2006 		    inet_ntop(AF_INET6,
   2007 			      &((struct sockaddr_in6 *)res->ai_addr)->
   2008 			      sin6_addr, addr, sizeof(addr));
   2009 		    break;
   2010 		default:
   2011 		    /* unknown */
   2012 		    continue;
   2013 		}
   2014 		for (tl = NO_proxy_domains->first; tl != NULL; tl = tl->next) {
   2015 		    if (strncmp(tl->ptr, addr, strlen(tl->ptr)) == 0) {
   2016 			freeaddrinfo(res0);
   2017 			ret = 1;
   2018 			goto end;
   2019 		    }
   2020 		}
   2021 	    }
   2022 	    freeaddrinfo(res0);
   2023 	    if (*af == PF_UNSPEC) {
   2024 		break;
   2025 	    }
   2026 	}
   2027 #endif				/* INET6 */
   2028     }
   2029   end:
   2030     TRAP_OFF;
   2031     return ret;
   2032 }
   2033 
   2034 char *
   2035 filename_extension(char *path, int is_url)
   2036 {
   2037     char *last_dot = "", *p = path;
   2038     int i;
   2039 
   2040     if (path == NULL)
   2041 	return last_dot;
   2042     if (*p == '.')
   2043 	p++;
   2044     for (; *p; p++) {
   2045 	if (*p == '.') {
   2046 	    last_dot = p;
   2047 	}
   2048 	else if (is_url && *p == '?')
   2049 	    break;
   2050     }
   2051     if (*last_dot == '.') {
   2052 	for (i = 1; last_dot[i] && i < 8; i++) {
   2053 	    if (is_url && !IS_ALNUM(last_dot[i]))
   2054 		break;
   2055 	}
   2056 	return allocStr(last_dot, i);
   2057     }
   2058     else
   2059 	return last_dot;
   2060 }
   2061 
   2062 #ifdef USE_EXTERNAL_URI_LOADER
   2063 static struct table2 **urimethods;
   2064 static struct table2 default_urimethods[] = {
   2065     {"mailto", "file:///$LIB/w3mmail.cgi?%s"},
   2066     {NULL, NULL}
   2067 };
   2068 
   2069 static struct table2 *
   2070 loadURIMethods(char *filename)
   2071 {
   2072     FILE *f;
   2073     int i, n;
   2074     Str tmp;
   2075     struct table2 *um;
   2076     char *up, *p;
   2077 
   2078     f = fopen(expandPath(filename), "r");
   2079     if (f == NULL)
   2080 	return NULL;
   2081     i = 0;
   2082     while (tmp = Strfgets(f), tmp->length > 0) {
   2083 	if (tmp->ptr[0] != '#')
   2084 	    i++;
   2085     }
   2086     fseek(f, 0, 0);
   2087     n = i;
   2088     um = New_N(struct table2, n + 1);
   2089     i = 0;
   2090     while (tmp = Strfgets(f), tmp->length > 0) {
   2091 	if (tmp->ptr[0] == '#')
   2092 	    continue;
   2093 	while (IS_SPACE(Strlastchar(tmp)))
   2094 	    Strshrink(tmp, 1);
   2095 	for (up = p = tmp->ptr; *p != '\0'; p++) {
   2096 	    if (*p == ':') {
   2097 		um[i].item1 = Strnew_charp_n(up, p - up)->ptr;
   2098 		p++;
   2099 		break;
   2100 	    }
   2101 	}
   2102 	if (*p == '\0')
   2103 	    continue;
   2104 	while (*p != '\0' && IS_SPACE(*p))
   2105 	    p++;
   2106 	um[i].item2 = Strnew_charp(p)->ptr;
   2107 	i++;
   2108     }
   2109     um[i].item1 = NULL;
   2110     um[i].item2 = NULL;
   2111     fclose(f);
   2112     return um;
   2113 }
   2114 
   2115 void
   2116 initURIMethods()
   2117 {
   2118     TextList *methodmap_list = NULL;
   2119     TextListItem *tl;
   2120     int i;
   2121 
   2122     if (non_null(urimethodmap_files))
   2123 	methodmap_list = make_domain_list(urimethodmap_files);
   2124     if (methodmap_list == NULL)
   2125 	return;
   2126     urimethods = New_N(struct table2 *, (methodmap_list->nitem + 1));
   2127     for (i = 0, tl = methodmap_list->first; tl; tl = tl->next) {
   2128 	urimethods[i] = loadURIMethods(tl->ptr);
   2129 	if (urimethods[i])
   2130 	    i++;
   2131     }
   2132     urimethods[i] = NULL;
   2133 }
   2134 
   2135 Str
   2136 searchURIMethods(ParsedURL *pu)
   2137 {
   2138     struct table2 *ump;
   2139     int i;
   2140     Str scheme = NULL;
   2141     Str url;
   2142     char *p;
   2143 
   2144     if (pu->scheme != SCM_UNKNOWN)
   2145 	return NULL;		/* use internal */
   2146     if (urimethods == NULL)
   2147 	return NULL;
   2148     url = parsedURL2Str(pu);
   2149     for (p = url->ptr; *p != '\0'; p++) {
   2150 	if (*p == ':') {
   2151 	    scheme = Strnew_charp_n(url->ptr, p - url->ptr);
   2152 	    break;
   2153 	}
   2154     }
   2155     if (scheme == NULL)
   2156 	return NULL;
   2157 
   2158     /*
   2159      * RFC2396 3.1. Scheme Component
   2160      * For resiliency, programs interpreting URI should treat upper case
   2161      * letters as equivalent to lower case in scheme names (e.g., allow
   2162      * "HTTP" as well as "http").
   2163      */
   2164     for (i = 0; (ump = urimethods[i]) != NULL; i++) {
   2165 	for (; ump->item1 != NULL; ump++) {
   2166 	    if (strcasecmp(ump->item1, scheme->ptr) == 0) {
   2167 		return Sprintf(ump->item2, url_quote(url->ptr));
   2168 	    }
   2169 	}
   2170     }
   2171     for (ump = default_urimethods; ump->item1 != NULL; ump++) {
   2172 	if (strcasecmp(ump->item1, scheme->ptr) == 0) {
   2173 	    return Sprintf(ump->item2, url_quote(url->ptr));
   2174 	}
   2175     }
   2176     return NULL;
   2177 }
   2178 
   2179 /*
   2180  * RFC2396: Uniform Resource Identifiers (URI): Generic Syntax
   2181  * Appendix A. Collected BNF for URI
   2182  * uric          = reserved | unreserved | escaped
   2183  * reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
   2184  *                 "$" | ","
   2185  * unreserved    = alphanum | mark
   2186  * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
   2187  *                  "(" | ")"
   2188  * escaped       = "%" hex hex
   2189  */
   2190 
   2191 #define URI_PATTERN	"([-;/?:@&=+$,a-zA-Z0-9_.!~*'()]|%[0-9A-Fa-f][0-9A-Fa-f])*"
   2192 void
   2193 chkExternalURIBuffer(Buffer *buf)
   2194 {
   2195     int i;
   2196     struct table2 *ump;
   2197 
   2198     for (i = 0; (ump = urimethods[i]) != NULL; i++) {
   2199 	for (; ump->item1 != NULL; ump++) {
   2200 	    reAnchor(buf, Sprintf("%s:%s", ump->item1, URI_PATTERN)->ptr);
   2201 	}
   2202     }
   2203     for (ump = default_urimethods; ump->item1 != NULL; ump++) {
   2204 	reAnchor(buf, Sprintf("%s:%s", ump->item1, URI_PATTERN)->ptr);
   2205     }
   2206 }
   2207 #endif
   2208 
   2209 ParsedURL *
   2210 schemeToProxy(int scheme)
   2211 {
   2212     ParsedURL *pu = NULL;	/* for gcc */
   2213     switch (scheme) {
   2214     case SCM_HTTP:
   2215 	pu = &HTTP_proxy_parsed;
   2216 	break;
   2217 #ifdef USE_SSL
   2218     case SCM_HTTPS:
   2219 	pu = &HTTPS_proxy_parsed;
   2220 	break;
   2221 #endif
   2222     case SCM_FTP:
   2223 	pu = &FTP_proxy_parsed;
   2224 	break;
   2225 #ifdef USE_GOPHER
   2226     case SCM_GOPHER:
   2227 	pu = &GOPHER_proxy_parsed;
   2228 	break;
   2229 #endif
   2230 #ifdef DEBUG
   2231     default:
   2232 	abort();
   2233 #endif
   2234     }
   2235     return pu;
   2236 }