w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

regex.c (18331B)


      1 /* $Id$ */
      2 /* 
      3  * regex: Regular expression pattern match library
      4  * 
      5  * by A.ITO, December 1989
      6  * Revised by A.ITO, January 2002
      7  */
      8 
      9 #ifdef REGEX_DEBUG
     10 #include <sys/types.h>
     11 #include <malloc.h>
     12 #endif				/* REGEX_DEBUG */
     13 #include <stdio.h>
     14 #include <stdlib.h>
     15 #include <string.h>
     16 #include <gc.h>
     17 #include "config.h"
     18 #ifdef USE_M17N
     19 #include "wc.h"
     20 #include "wtf.h"
     21 #ifdef USE_UNICODE
     22 #include "ucs.h"
     23 #endif
     24 #endif
     25 #include "regex.h"
     26 #include "config.h"
     27 #include "myctype.h"
     28 
     29 #ifndef NULL
     30 #define NULL	0
     31 #endif				/* not NULL */
     32 
     33 #define RE_ITER_LIMIT   65535
     34 
     35 #define RE_MATCHMODE	0x07
     36 #define	RE_NORMAL	0x00
     37 #define RE_ANY		0x01
     38 #define RE_WHICH	0x02
     39 #define RE_EXCEPT	0x03
     40 #define RE_SUBREGEX     0x04
     41 #define RE_BEGIN	0x05
     42 #define RE_END		0x06
     43 #define RE_ENDMARK	0x07
     44 
     45 #define RE_OPT          0x08
     46 #define RE_ANYTIME	0x10
     47 #define RE_IGNCASE      0x40
     48 
     49 #define RE_MODE(x)      ((x)->mode&RE_MATCHMODE)
     50 #define RE_SET_MODE(x,v) ((x)->mode = (((x)->mode&~RE_MATCHMODE)|((v)&RE_MATCHMODE)))
     51 
     52 #ifdef REGEX_DEBUG
     53 void debugre(regexchar *);
     54 char *lc2c(longchar *, int);
     55 int verbose;
     56 #endif				/* REGEX_DEBUG */
     57 
     58 #ifdef USE_M17N
     59 #define get_mclen(c) wtf_len1((wc_uchar *)(c))
     60 #else
     61 #define get_mclen(c) 1
     62 #endif
     63 
     64 #ifndef TOLOWER
     65 #include <ctype.h>
     66 #define TOLOWER(x) tolower(x)
     67 #define TOUPPER(x) toupper(x)
     68 #endif
     69 
     70 #define RE_TYPE_END     0
     71 #define RE_TYPE_CHAR    1
     72 #define RE_TYPE_WCHAR_T 2
     73 #define RE_WHICH_RANGE  3
     74 #define RE_TYPE_SYMBOL  4
     75 
     76 static longchar
     77 set_longchar(char *str)
     78 {
     79     unsigned char *p = (unsigned char *)str;
     80     longchar r;
     81 
     82 #ifdef USE_M17N
     83     if (*p & 0x80) {
     84 	r.wch = wtf_parse1(&p);
     85 	if (r.wch.ccs == WC_CCS_SPECIAL || r.wch.ccs == WC_CCS_SPECIAL_W) {
     86 	    r.type = RE_TYPE_SYMBOL;
     87 	    return r;
     88 	}
     89 #ifdef USE_UNICODE
     90 	if (WC_CCS_IS_UNICODE(r.wch.ccs)) {
     91 	    if (WC_CCS_SET(r.wch.ccs) == WC_CCS_UCS_TAG)
     92 		r.wch.code = wc_ucs_tag_to_ucs(r.wch.code);
     93 	    r.wch.ccs = WC_CCS_UCS4;
     94 	}
     95 	else
     96 #endif
     97 	    r.wch.ccs = WC_CCS_SET(r.wch.ccs);
     98 	r.type = RE_TYPE_WCHAR_T;
     99 	return r;
    100     }
    101 #endif
    102     r.ch = *p;
    103     r.type = RE_TYPE_CHAR;
    104     return r;
    105 }
    106 
    107 static Regex DefaultRegex;
    108 #define CompiledRegex DefaultRegex.re
    109 #define Cstorage DefaultRegex.storage
    110 
    111 static int regmatch(regexchar *, char *, char *, int, char **);
    112 static int regmatch1(regexchar *, longchar *);
    113 static int matchWhich(longchar *, longchar *, int);
    114 static int match_longchar(longchar *, longchar *, int);
    115 static int match_range_longchar(longchar *, longchar *, longchar *, int);
    116 
    117 /* 
    118  * regexCompile: compile regular expression
    119  */
    120 char *
    121 regexCompile(char *ex, int igncase)
    122 {
    123     char *msg;
    124     newRegex(ex, igncase, &DefaultRegex, &msg);
    125     return msg;
    126 }
    127 
    128 static Regex *
    129 newRegex0(char **ex, int igncase, Regex *regex, char **msg, int level)
    130 {
    131     char *p;
    132     longchar *r;
    133     regexchar *re;
    134     int m;
    135     longchar *st_ptr;
    136 
    137     if (regex == NULL)
    138 	regex = (Regex *)GC_malloc(sizeof(Regex));
    139     regex->alt_regex = NULL;
    140     re = regex->re;
    141     st_ptr = regex->storage;
    142     for (p = *ex; *p != '\0'; p++) {
    143 	re->mode = 0;
    144 	switch (*p) {
    145 	case '.':
    146 	    re->p.pattern = NULL;
    147 	    RE_SET_MODE(re, RE_ANY);
    148 	    re++;
    149 	    break;
    150 	case '$':
    151 	    re->p.pattern = NULL;
    152 	    RE_SET_MODE(re, RE_END);
    153 	    re++;
    154 	    break;
    155 	case '^':
    156 	    re->p.pattern = NULL;
    157 	    RE_SET_MODE(re, RE_BEGIN);
    158 	    re++;
    159 	    break;
    160 	case '+':
    161 	    if (re == regex->re ||
    162 		(RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) {
    163 		if (msg)
    164 		    *msg = "Invalid regular expression";
    165 		return NULL;
    166 	    }
    167 	    *re = *(re - 1);
    168 	    re->mode |= RE_ANYTIME;
    169 	    re++;
    170 	    break;
    171 	case '*':
    172 	    if (re == regex->re ||
    173 		(RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) {
    174 		if (msg)
    175 		    *msg = "Invalid regular expression";
    176 		return NULL;
    177 	    }
    178 	    (re - 1)->mode |= RE_ANYTIME;
    179 	    break;
    180 	case '?':
    181 	    if (re == regex->re ||
    182 		(RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) {
    183 		if (msg)
    184 		    *msg = "Invalid regular expression";
    185 		return NULL;
    186 	    }
    187 	    (re - 1)->mode |= RE_OPT;
    188 	    break;
    189 	case '[':
    190 	    r = st_ptr;
    191 	    if (*++p == '^') {
    192 		p++;
    193 		m = RE_EXCEPT;
    194 	    }
    195 	    else
    196 		m = RE_WHICH;
    197 	    if (*p == '-' || *p == ']')
    198 		*(st_ptr++) = set_longchar(p);
    199 	    while (*p != ']') {
    200 		if (*p == '\\') {
    201 		    p++;
    202 		    *(st_ptr++) = set_longchar(p);
    203 		    p += get_mclen(p);
    204 		}
    205 		else if (*p == '-' && *(p + 1) != ']') {
    206 		    (st_ptr++)->type = RE_WHICH_RANGE;
    207 		    p++;
    208 		}
    209 		else if (*p == '\0') {
    210 		    if (msg)
    211 			*msg = "Missing ]";
    212 		    return NULL;
    213 		}
    214 		else {
    215 		    *(st_ptr++) = set_longchar(p);
    216 		    p += get_mclen(p);
    217 		}
    218 		if (st_ptr >= &regex->storage[STORAGE_MAX]) {
    219 		    if (msg)
    220 			*msg = "Regular expression too long";
    221 		    return NULL;
    222 		}
    223 	    }
    224 	    (st_ptr++)->type = RE_TYPE_END;
    225 	    re->p.pattern = r;
    226 	    RE_SET_MODE(re, m);
    227 	    if (igncase)
    228 		re->mode |= RE_IGNCASE;
    229 	    re++;
    230 	    break;
    231 	case '|':
    232 	    RE_SET_MODE(re, RE_ENDMARK);
    233 	    re++;
    234 	    p++;
    235 	    regex->alt_regex = newRegex0(&p, igncase, NULL, msg, level);
    236 	    if (regex->alt_regex == NULL)
    237 		return NULL;
    238 	    *ex = p;
    239 	    return regex;
    240 	case '(':
    241 	    RE_SET_MODE(re, RE_SUBREGEX);
    242 	    p++;
    243 	    re->p.sub = newRegex0(&p, igncase, NULL, msg, level + 1);
    244 	    if (re->p.sub == NULL)
    245 		return NULL;
    246 	    re++;
    247 	    break;
    248 	case ')':
    249 	    if (level == 0) {
    250 		if (msg)
    251 		    *msg = "Too many ')'";
    252 		return NULL;
    253 	    }
    254 	    RE_SET_MODE(re, RE_ENDMARK);
    255 	    re++;
    256 	    *ex = p;
    257 	    return regex;
    258 	case '\\':
    259 	    p++;
    260 	default:
    261 	    *(st_ptr) = set_longchar(p);
    262 	    p += get_mclen(p) - 1;
    263 	    re->p.pattern = st_ptr;
    264 	    st_ptr++;
    265 	    RE_SET_MODE(re, RE_NORMAL);
    266 	    if (igncase)
    267 		re->mode |= RE_IGNCASE;
    268 	    re++;
    269 	}
    270 	if (st_ptr >= &regex->storage[STORAGE_MAX] ||
    271 	    re >= &regex->re[REGEX_MAX]) {
    272 	    if (msg)
    273 		*msg = "Regular expression too long";
    274 	    return NULL;
    275 	}
    276     }
    277     RE_SET_MODE(re, RE_ENDMARK);
    278     if (msg)
    279 	*msg = NULL;
    280     *ex = p;
    281     return regex;
    282 }
    283 
    284 Regex *
    285 newRegex(char *ex, int igncase, Regex *regex, char **msg)
    286 {
    287     return newRegex0(&ex, igncase, regex, msg, 0);
    288 }
    289 
    290 /* 
    291  * regexMatch: match regular expression
    292  */
    293 int
    294 regexMatch(char *str, int len, int firstp)
    295 {
    296     return RegexMatch(&DefaultRegex, str, len, firstp);
    297 }
    298 
    299 int
    300 RegexMatch(Regex *re, char *str, int len, int firstp)
    301 {
    302     char *p, *ep;
    303     char *lpos;
    304     Regex *r;
    305 
    306     if (str == NULL)
    307 	return 0;
    308     if (len < 0)
    309 	len = strlen(str);
    310     re->position = NULL;
    311     ep = str + len;
    312     for (p = str; p <= ep; p++) {
    313 	lpos = NULL;
    314 	re->lposition = NULL;
    315 	for (r = re; r != NULL; r = r->alt_regex) {
    316 	    switch (regmatch(r->re, p, ep, firstp && (p == str), &lpos)) {
    317 	    case 1:		/* matched */
    318 		re->position = p;
    319 		if (re->lposition == NULL || re->lposition < lpos)
    320 		    re->lposition = lpos;
    321 		break;
    322 	    case -1:		/* error */
    323 		re->position = NULL;
    324 		return -1;
    325 	    }
    326 	}
    327 	if (re->lposition != NULL) {
    328 	    /* matched */
    329 	    return 1;
    330 	}
    331 	p += get_mclen(p) - 1;
    332     }
    333     return 0;
    334 }
    335 
    336 /* 
    337  * matchedPosition: last matched position
    338  */
    339 void
    340 MatchedPosition(Regex *re, char **first, char **last)
    341 {
    342     *first = re->position;
    343     *last = re->lposition;
    344 }
    345 
    346 void
    347 matchedPosition(char **first, char **last)
    348 {
    349     *first = DefaultRegex.position;
    350     *last = DefaultRegex.lposition;
    351 }
    352 
    353 /* 
    354  * Intermal routines
    355  */
    356 
    357 struct MatchingContext1 {
    358     int label;
    359     regexchar *re;
    360     char *lastpos;
    361     char *str;
    362     int iter_limit;
    363     int n_any;
    364     int firstp;
    365     char *end_p;
    366     Regex *sub_regex;
    367     struct MatchingContext1 *sub_ctx;
    368     struct MatchingContext2 *ctx2;
    369 };
    370 
    371 struct MatchingContext2 {
    372     int label;
    373     Regex *regex;
    374     char *lastpos;
    375     struct MatchingContext1 *ctx;
    376     struct MatchingContext2 *ctx2;
    377     char *str;
    378     int n_any;
    379     int firstp;
    380 };
    381 
    382 
    383 #define YIELD(retval,context,lnum) (context)->label = lnum; return (retval); label##lnum:
    384 
    385 static int regmatch_iter(struct MatchingContext1 *,
    386 			 regexchar *, char *, char *, int);
    387 
    388 static int
    389 regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex,
    390 		     regexchar * pat2,
    391 		     char *str, char *end_p, int iter_limit, int firstp)
    392 {
    393     switch (c->label) {
    394     case 1:
    395 	goto label1;
    396     case 2:
    397 	goto label2;
    398     case 3:
    399 	goto label3;
    400     }
    401     c->ctx = GC_malloc(sizeof(struct MatchingContext1));
    402     c->ctx2 = GC_malloc(sizeof(struct MatchingContext2));
    403     c->ctx->label = 0;
    404     c->regex = regex;
    405     c->n_any = 0;
    406     c->str = str;
    407     c->firstp = firstp;
    408     for (;;) {
    409 	c->ctx->label = 0;
    410 	while (regmatch_iter(c->ctx, c->regex->re, c->str, end_p, c->firstp)) {
    411 	    c->n_any = c->ctx->lastpos - c->str;
    412 	    if (c->n_any <= 0)
    413 		continue;
    414 	    c->firstp = 0;
    415 	    if (RE_MODE(pat2) == RE_ENDMARK) {
    416 		c->lastpos = c->str + c->n_any;
    417 		YIELD(1, c, 1);
    418 	    }
    419 	    else if (regmatch(pat2, c->str + c->n_any, end_p,
    420 			      c->firstp, &c->lastpos) == 1) {
    421 		YIELD(1, c, 2);
    422 	    }
    423 	    if (iter_limit == 1)
    424 		continue;
    425 	    c->ctx2->label = 0;
    426 	    while (regmatch_sub_anytime(c->ctx2, regex, pat2,
    427 					c->str + c->n_any, end_p,
    428 					iter_limit - 1, c->firstp)) {
    429 
    430 		c->lastpos = c->ctx2->lastpos;
    431 		YIELD(1, c, 3);
    432 	    }
    433 	}
    434 	if (c->regex->alt_regex == NULL)
    435 	    break;
    436 	c->regex = c->regex->alt_regex;
    437     }
    438     return 0;
    439 }
    440 
    441 static int
    442 regmatch_iter(struct MatchingContext1 *c,
    443 	      regexchar * re, char *str, char *end_p, int firstp)
    444 {
    445     switch (c->label) {
    446     case 1:
    447 	goto label1;
    448     case 2:
    449 	goto label2;
    450     case 3:
    451 	goto label3;
    452     case 4:
    453 	goto label4;
    454     case 5:
    455 	goto label5;
    456     case 6:
    457 	goto label6;
    458     case 7:
    459 	goto label7;
    460     }
    461     if (RE_MODE(re) == RE_ENDMARK)
    462 	return 0;
    463     c->re = re;
    464     c->firstp = firstp;
    465     c->str = str;
    466     c->end_p = end_p;
    467     c->sub_ctx = NULL;
    468     c->lastpos = NULL;
    469     while (RE_MODE(c->re) != RE_ENDMARK) {
    470 	if (c->re->mode & (RE_ANYTIME | RE_OPT)) {
    471 	    if (c->re->mode & RE_ANYTIME)
    472 		c->iter_limit = RE_ITER_LIMIT;
    473 	    else
    474 		c->iter_limit = 1;
    475 	    c->n_any = -1;
    476 	    while (c->n_any < c->iter_limit) {
    477 		if (c->str + c->n_any >= c->end_p) {
    478 		    return 0;
    479 		}
    480 		if (c->n_any >= 0) {
    481 		    if (RE_MODE(c->re) == RE_SUBREGEX) {
    482 			c->ctx2 = GC_malloc(sizeof(struct MatchingContext2));
    483 			c->ctx2->label = 0;
    484 			while (regmatch_sub_anytime(c->ctx2,
    485 						    c->re->p.sub,
    486 						    c->re + 1,
    487 						    c->str + c->n_any,
    488 						    c->end_p,
    489 						    c->iter_limit,
    490 						    c->firstp)) {
    491 			    c->n_any = c->ctx2->lastpos - c->str;
    492 			    c->lastpos = c->ctx2->lastpos;
    493 			    YIELD(1, c, 1);
    494 			}
    495 			return 0;
    496 		    }
    497 		    else {
    498 			longchar k;
    499 			k = set_longchar(c->str + c->n_any);
    500 			if (regmatch1(c->re, &k)) {
    501 			    c->n_any += get_mclen(c->str + c->n_any);
    502 			}
    503 			else {
    504 			    return 0;
    505 			}
    506 			c->firstp = 0;
    507 		    }
    508 		}
    509 		else
    510 		    c->n_any++;
    511 		if (RE_MODE(c->re + 1) == RE_ENDMARK) {
    512 		    c->lastpos = c->str + c->n_any;
    513 		    YIELD(1, c, 2);
    514 		}
    515 		else if (regmatch(c->re + 1, c->str + c->n_any, c->end_p,
    516 				  c->firstp, &c->lastpos) == 1) {
    517 		    YIELD(1, c, 3);
    518 		}
    519 	    }
    520 	    return 0;
    521 	}
    522 	/* regexp other than pat*, pat+ and pat? */
    523 	switch (RE_MODE(c->re)) {
    524 	case RE_BEGIN:
    525 	    if (!c->firstp)
    526 		return 0;
    527 	    c->re++;
    528 	    break;
    529 	case RE_END:
    530 	    if (c->str >= c->end_p) {
    531 		c->lastpos = c->str;
    532 		c->re++;
    533 		YIELD(1, c, 4);
    534 	    }
    535 	    else {
    536 		c->lastpos = NULL;
    537 		return 0;
    538 	    }
    539 	    break;
    540 	case RE_SUBREGEX:
    541 	    if (c->sub_ctx == NULL) {
    542 		c->sub_ctx = GC_malloc(sizeof(struct MatchingContext1));
    543 	    }
    544 	    c->sub_regex = c->re->p.sub;
    545 	    for (;;) {
    546 		c->sub_ctx->label = 0;
    547 		while (regmatch_iter(c->sub_ctx, c->sub_regex->re,
    548 				     c->str, c->end_p, c->firstp)) {
    549 		    if (c->sub_ctx->lastpos != c->str)
    550 			c->firstp = 0;
    551 		    if (RE_MODE(c->re + 1) == RE_ENDMARK) {
    552 			c->lastpos = c->sub_ctx->lastpos;
    553 			YIELD(1, c, 5);
    554 		    }
    555 		    else if (regmatch(c->re + 1, c->sub_ctx->lastpos, c->end_p,
    556 				      c->firstp, &c->lastpos) == 1) {
    557 			YIELD(1, c, 6);
    558 		    }
    559 		}
    560 		if (c->sub_regex->alt_regex == NULL)
    561 		    break;
    562 		c->sub_regex = c->sub_regex->alt_regex;
    563 	    }
    564 	    return 0;
    565 	default:
    566 	    {
    567 		longchar k;
    568 		k = set_longchar(c->str);
    569 		c->str += get_mclen(c->str);
    570 		if (!regmatch1(c->re, &k))
    571 		    return 0;
    572 	    }
    573 	    c->re++;
    574 	    c->firstp = 0;
    575 	}
    576 	if (c->str > c->end_p) {
    577 	    return 0;
    578 	}
    579     }
    580     c->lastpos = c->str;
    581 #ifdef REGEX_DEBUG
    582     if (verbose)
    583 	printf("Succeed: %s %d\n", c->str, c->lastpos - c->str);
    584 #endif
    585     YIELD(1, c, 7);
    586     return 0;
    587 }
    588 
    589 static int
    590 regmatch(regexchar * re, char *str, char *end_p, int firstp, char **lastpos)
    591 {
    592     struct MatchingContext1 contx;
    593 
    594     *lastpos = NULL;
    595 
    596     contx.label = 0;
    597     while (regmatch_iter(&contx, re, str, end_p, firstp)) {
    598 #ifdef REGEX_DEBUG
    599 	char *p;
    600 	if (verbose) {
    601 	    printf("regmatch: matched <");
    602 	    for (p = str; p < contx.lastpos; p++)
    603 		putchar(*p);
    604 	    printf(">\n");
    605 	}
    606 #endif
    607 	if (*lastpos == NULL || *lastpos < contx.lastpos)
    608 	    *lastpos = contx.lastpos;
    609     }
    610     if (*lastpos == NULL)
    611 	return 0;
    612     return 1;
    613 }
    614 
    615 
    616 static int
    617 regmatch1(regexchar * re, longchar * c)
    618 {
    619     int ans;
    620 
    621 #ifdef USE_M17N
    622     if (c->type == RE_TYPE_SYMBOL)
    623 	return 0;
    624 #endif
    625     switch (RE_MODE(re)) {
    626     case RE_ANY:
    627 #ifdef REGEX_DEBUG
    628 	if (verbose)
    629 	    printf("%s vs any. -> 1\n", lc2c(c, 1));
    630 #endif				/* REGEX_DEBUG */
    631 	return 1;
    632     case RE_NORMAL:
    633 	ans = match_longchar(re->p.pattern, c, re->mode & RE_IGNCASE);
    634 #ifdef REGEX_DEBUG
    635 	if (verbose)
    636 	    printf("RE=%s vs %s -> %d\n", lc2c(re->p.pattern, 1), lc2c(c, 1),
    637 		   ans);
    638 #endif				/* REGEX_DEBUG */
    639 	return ans;
    640     case RE_WHICH:
    641 	return matchWhich(re->p.pattern, c, re->mode & RE_IGNCASE);
    642     case RE_EXCEPT:
    643 	return !matchWhich(re->p.pattern, c, re->mode & RE_IGNCASE);
    644     }
    645     return 0;
    646 }
    647 
    648 static int
    649 matchWhich(longchar * pattern, longchar * c, int igncase)
    650 {
    651     longchar *p = pattern;
    652     int ans = 0;
    653 
    654 #ifdef REGEX_DEBUG
    655     if (verbose)
    656 	printf("RE pattern = %s char=%s", lc2c(pattern, 10000), lc2c(c, 1));
    657 #endif				/* REGEX_DEBUG */
    658     while (p->type != RE_TYPE_END) {
    659 	if ((p + 1)->type == RE_WHICH_RANGE && (p + 2)->type != RE_TYPE_END) {
    660 	    if (match_range_longchar(p, p + 2, c, igncase)) {
    661 		ans = 1;
    662 		break;
    663 	    }
    664 	    p += 3;
    665 	}
    666 	else {
    667 	    if (match_longchar(p, c, igncase)) {
    668 		ans = 1;
    669 		break;
    670 	    }
    671 	    p++;
    672 	}
    673     }
    674 #ifdef REGEX_DEBUG
    675     if (verbose)
    676 	printf(" -> %d\n", ans);
    677 #endif				/* REGEX_DEBUG */
    678     return ans;
    679 }
    680 
    681 static int
    682 match_longchar(longchar * a, longchar * b, int ignore)
    683 {
    684 #ifdef USE_M17N
    685     if (a->type != b->type)
    686 	return 0;
    687     if (a->type == RE_TYPE_WCHAR_T) {
    688 #ifdef USE_UNICODE
    689 	if (ignore) {
    690 	    wc_uint32 ua = wc_any_to_ucs(a->wch), ub = wc_any_to_ucs(b->wch);
    691 	    return (ua == ub ||
    692 		    ua == wc_ucs_tolower(ub) ||
    693 	            ua == wc_ucs_toupper(ub) ||
    694 		    ua == wc_ucs_totitle(ub));
    695 	}
    696 #endif
    697 	return (a->wch.ccs == b->wch.ccs) && (a->wch.code == b->wch.code);
    698     }
    699 #endif
    700     if (ignore && IS_ALPHA(b->ch))
    701 	return (a->ch == TOLOWER(b->ch) || a->ch == TOUPPER(b->ch));
    702     else
    703 	return a->ch == b->ch;
    704 }
    705 
    706 static int
    707 match_range_longchar(longchar * a, longchar * b, longchar * c, int ignore)
    708 {
    709 #ifdef USE_M17N
    710     if (a->type != b->type || a->type != c->type)
    711 	return 0;
    712     if (a->type == RE_TYPE_WCHAR_T) {
    713 	if (a->wch.ccs != c->wch.ccs || c->wch.ccs != b->wch.ccs)
    714 	    return 0;
    715 #ifdef USE_UNICODE
    716 	if (ignore) {
    717 	    wc_uint32 uc = wc_any_to_ucs(c->wch);
    718 
    719 	    if (wc_is_ucs_alpha(uc)) {
    720 	    	wc_uint32 ua = wc_any_to_ucs(a->wch);
    721 	    	wc_uint32 ub = wc_any_to_ucs(b->wch);
    722 		wc_uint32 upper = wc_ucs_toupper(uc);
    723 		wc_uint32 lower = wc_ucs_tolower(uc);
    724 		wc_uint32 title = wc_ucs_totitle(uc);
    725 
    726 		return ((ua <= upper && upper <= ub) ||
    727 			(ua <= lower && lower <= ub) ||
    728 			(ua <= title && title <= ub));
    729 	    }
    730 	}
    731 #endif
    732 	return (a->wch.code <= c->wch.code && c->wch.code <= b->wch.code);
    733     }
    734 #endif
    735     if (ignore && IS_ALPHA(c->ch))
    736 	return ((a->ch <= TOLOWER(c->ch) && TOLOWER(c->ch) <= b->ch) ||
    737 		(a->ch <= TOUPPER(c->ch) && TOUPPER(c->ch) <= b->ch));
    738     else
    739 	return (a->ch <= c->ch && c->ch <= b->ch);
    740 }
    741 
    742 #ifdef REGEX_DEBUG
    743 char *
    744 lc2c(longchar * x, int len)
    745 {
    746     static char y[100];
    747     int i = 0, j = 0;
    748     char *r;
    749 
    750     while (x[j].type != RE_TYPE_END && j < len) {
    751 	if (x[j].type == RE_WHICH_RANGE)
    752 	    y[i++] = '-';
    753 #ifdef USE_M17N
    754 	else if (x[j].type == RE_TYPE_WCHAR_T) {
    755 	    char buf[20];
    756 	    sprintf(buf, "[%x-%x]", x[j].wch.ccs, x[j].wch.code);
    757 	    strcpy(&y[i], buf);
    758 	    i += strlen(buf);
    759 	}
    760 #endif
    761 	else
    762 	    y[i++] = x[j].ch;
    763 	j++;
    764     }
    765     y[i] = '\0';
    766     r = GC_malloc_atomic(i + 1);
    767     strcpy(r, y);
    768     return r;
    769 }
    770 
    771 void
    772 debugre(regexchar * re)
    773 {
    774     for (; RE_MODE(re) != RE_ENDMARK; re++) {
    775 	switch (RE_MODE(re)) {
    776 	case RE_BEGIN:
    777 	    printf("Begin ");
    778 	    continue;
    779 	case RE_END:
    780 	    printf("End ");
    781 	    continue;
    782 	}
    783 	if (re->mode & RE_ANYTIME)
    784 	    printf("Anytime-");
    785 	if (re->mode & RE_OPT)
    786 	    printf("Opt-");
    787 
    788 	switch (RE_MODE(re)) {
    789 	case RE_ANY:
    790 	    printf("Any ");
    791 	    break;
    792 	case RE_NORMAL:
    793 	    printf("Match-to'%c' ", *re->p.pattern);
    794 	    break;
    795 	case RE_WHICH:
    796 	    printf("One-of\"%s\" ", lc2c(re->p.pattern, 10000));
    797 	    break;
    798 	case RE_EXCEPT:
    799 	    printf("Other-than\"%s\" ", lc2c(re->p.pattern, 10000));
    800 	    break;
    801 	case RE_SUBREGEX:
    802 	    {
    803 		Regex *r = re->p.sub;
    804 		printf("(");
    805 		while (r) {
    806 		    debugre(r->re);
    807 		    if (r->alt_regex)
    808 			printf(" | ");
    809 		    r = r->alt_regex;
    810 		}
    811 		printf(")");
    812 		break;
    813 	    }
    814 	default:
    815 	    printf("Unknown ");
    816 	}
    817     }
    818 }
    819 
    820 #endif				/* REGEX_DEBUG */
    821 
    822 #ifdef REGEXTEST
    823 int
    824 main(int argc, char **argv)
    825 {
    826     char buf[128], buf2[128];
    827     char *msg;
    828     Regex *re;
    829     char *fpos, *epos;
    830     FILE *f = stdin;
    831     int i = 1;
    832 
    833 #ifdef USE_M17N
    834     wtf_init(WC_CES_EUC_JP, WC_CES_EUC_JP);
    835 #endif
    836 #ifdef REGEX_DEBUG
    837     for (i = 1; i < argc; i++) {
    838 	if (strcmp(argv[i], "-v") == 0)
    839 	    verbose = 1;
    840 	else
    841 	    break;
    842     }
    843 #endif
    844 
    845     if (argc > i)
    846 	f = fopen(argv[i], "r");
    847     if (f == NULL) {
    848 	fprintf(stderr, "Can't open %s\n", argv[i]);
    849 	exit(1);
    850     }
    851     while (fscanf(f, "%s%s", buf, buf2) == 2) {
    852 	re = newRegex(buf, 0, NULL, &msg);
    853 	if (re == NULL) {
    854 	    printf("Error on regexp /%s/: %s\n", buf, msg);
    855 	    exit(1);
    856 	}
    857 	if (RegexMatch(re, buf2, -1, 1)) {
    858 	    printf("/%s/\t\"%s\"\t\"", buf, buf2);
    859 	    MatchedPosition(re, &fpos, &epos);
    860 	    while (fpos < epos)
    861 		putchar(*(fpos++));
    862 	    putchar('"');
    863 	}
    864 	else
    865 	    printf("/%s/\t\"%s\"\tno_match", buf, buf2);
    866 	putchar('\n');
    867     }
    868     /* notreatched */
    869     return 0;
    870 }
    871 #endif