w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

mk_gb18030_ucs_map.pl (2768B)


      1 
      2 @NAME = ();
      3 while(<DATA>) {
      4   chop;
      5   s/\s*$//;
      6   (($n, $m, $c) = split(" ", $_, 3)) >= 3 || next;
      7   push(@NAME, $n);
      8   $MAP{$n} = $m;
      9   $CODE{$n} = $c;
     10 }
     11 
     12 %from_ucs0 = ();
     13 foreach $name (@NAME) {
     14 
     15 $code = $CODE{$name};
     16 $map = $MAP{$name};
     17 
     18 print "$name\t$map\t$code\n";
     19 
     20 %to_ucs = ();
     21 %from_ucs = ();
     22 open(MAP, "< $map");
     23 while(<MAP>) {
     24   /^#/ && next;
     25   s/#.*//;
     26   (($i, $u) = split(" ")) || next;
     27   $i = hex($i);
     28   $u = hex($u);
     29   $from_ucs{$u} = $i;
     30   if (! $from_ucs0{$u}) {
     31     $to_ucs{$i} = $u;
     32   }
     33 }
     34 
     35 if ($name eq "gbk") {
     36   %from_ucs0 = %from_ucs;
     37   next;
     38 }
     39 
     40 $p = 0;
     41 for $ub (0x81 .. 0xFE) {
     42   for $lb (0x40 .. 0x7E, 0x80 .. 0xFE) {
     43     $i = ($ub << 8) + $lb;
     44     if ($u = $to_ucs{$i}) {
     45       if ($u != $ou + 1) {
     46 	if ($p) {
     47           $ucs2_end{$su} = $ou;
     48           $gbk_end{$s} = $og;
     49 	}
     50 	$p = 0;
     51       }
     52       if (! $p) {
     53         $to_ucs2{$i} = $u;
     54         $from_ucs2{$u} = $i;
     55         $s = $i;
     56         $su = $u;
     57       }
     58       $p = 1;
     59       $ou = $u;
     60     } else {
     61       if ($p) {
     62         $ucs2_end{$su} = $ou;
     63         $gbk_end{$s} = $og;
     64       }
     65       $p = 0;
     66     }
     67     $og = $i;
     68   }
     69 }
     70 if ($p) {
     71   $ucs2_end{$su} = $ou;
     72   $gbk_end{$s} = 0xFEFE;
     73 }
     74 
     75 %from_ucs4 = ();
     76 $i = 0;
     77 $p = 0;
     78 for $u (0x0080 .. 0xD7FF, 0xE000 .. 0xFFFF) {
     79   if (! $from_ucs{$u}) {
     80     if (! $p) {
     81       $from_ucs4{$u} = $i;
     82       $s = $u;
     83     }
     84     $i++;
     85     $p = 1;
     86   } else {
     87     if ($p) {
     88       $ucs4_end{$s} = $u - 1;
     89     }
     90     $p = 0;
     91   }
     92   if ($u == 0xD7FF) {
     93     if ($p) {
     94       $ucs4_end{$s} = $u - 1;
     95     }
     96     $p = 0;
     97   }
     98 }
     99 if ($p) {
    100   $ucs4_end{$s} = 0xFFFF;
    101 }
    102 
    103 open(OUT, "> ${name}_ucs.map");
    104 
    105 # print OUT <<EOF;
    106 # /*
    107 #   These conversion tables between $code and
    108 #   Unicode were made from
    109 # 
    110 #     ftp://ftp.unicode.org/Public/MAPPINGS/$map.
    111 # */
    112 print OUT <<EOF;
    113 /* $code */
    114 EOF
    115 
    116 @ucs = sort { $a <=> $b } keys %to_ucs2;
    117 $nucs = @ucs + 0;
    118 
    119 print OUT <<EOF;
    120 
    121 #define N_gbk_ext_ucs_map $nucs
    122 
    123 wc_map3 gbk_ext_ucs_map[ N_gbk_ext_ucs_map ] = {
    124 EOF
    125 for(@ucs) {
    126   printf OUT "  { 0x%.4X, 0x%.4X, 0x%.4X },\n", $_, $gbk_end{$_}, $to_ucs2{$_};
    127 }
    128 
    129 print OUT <<EOF;
    130 };
    131 EOF
    132 
    133 @ucs = sort { $a <=> $b } keys %from_ucs2;
    134 $nucs = @ucs + 0;
    135 
    136 print OUT <<EOF;
    137 
    138 #define N_ucs_gbk_ext_map $nucs
    139 
    140 static wc_map3 ucs_gbk_ext_map[ N_ucs_gbk_ext_map ] = {
    141 EOF
    142 for(@ucs) {
    143   printf OUT "  { 0x%.4X, 0x%.4X, 0x%.4X },\n", $_, $ucs2_end{$_}, $from_ucs2{$_};
    144 }
    145 
    146 print OUT <<EOF;
    147 };
    148 EOF
    149 
    150 @ucs = sort { $a <=> $b } keys %from_ucs4;
    151 $nucs = @ucs + 0;
    152 
    153 print OUT <<EOF;
    154 
    155 #define N_ucs_${name}_map $nucs
    156 
    157 static wc_map3 ucs_${name}_map[ N_ucs_${name}_map ] = {
    158 EOF
    159 for(@ucs) {
    160   printf OUT "  { 0x%.4X, 0x%.4X, 0x%.4X },\n", $_, $ucs4_end{$_}, $from_ucs4{$_};
    161 }
    162 
    163 print OUT <<EOF;
    164 };
    165 EOF
    166 
    167 close(MAP);
    168 }
    169 
    170 __END__
    171 gbk		VENDORS/MICSFT/WINDOWS/CP936.TXT		GBK (Chinese)
    172 gb18030		GBK.TXT		GB18030 (Chinese)