w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

mk_gb_ucs_map.pl (1591B)


      1 
      2 @NAME = ();
      3 while(<DATA>) {
      4   chop;
      5   s/\s*$//;
      6   (($n, $m, $c) = split(" ", $_, 3)) >= 3 || next;
      7   push(@NAME, $n);
      8   $MAP{$n} = $m;
      9   $CODE{$n} = $c;
     10 }
     11 
     12 foreach $name (@NAME) {
     13 
     14 $code = $CODE{$name};
     15 $map = $MAP{$name};
     16 
     17 print "$name\t$map\t$code\n";
     18 
     19 @to_ucs = ();
     20 %from_ucs = ();
     21 open(MAP, "< $map");
     22 open(OUT, "> ${name}_ucs.map");
     23 while(<MAP>) {
     24   /^#/ && next;
     25   s/#.*//;
     26   (($i, $u) = split(" ")) || next;
     27   $i = hex($i);
     28   $u = hex($u);
     29   $to_ucs[$i] = $u;
     30   if ($u > 0) {
     31     $from_ucs{$u} = $i;
     32   }
     33 }
     34 
     35 # compatibility with GBK(CP936), GB18030
     36 delete $from_ucs{$to_ucs[0x2124]};
     37 delete $from_ucs{$to_ucs[0x212A]};
     38 $from_ucs{0x00B7} = 0x2124;
     39 $from_ucs{0x2014} = 0x212A;
     40 $to_ucs[0x2124] = 0x00B7;
     41 $to_ucs[0x212A] = 0x2014;
     42 
     43 # print OUT <<EOF;
     44 # /*
     45 #   These conversion tables between $code and
     46 #   Unicode were made from
     47 # 
     48 #     ftp://ftp.unicode.org/Public/MAPPINGS/$map.
     49 # */
     50 print OUT <<EOF;
     51 /* $code */
     52 
     53 static wc_uint16 ${name}_ucs_map[ 0x5E * 0x5E ] = {
     54 EOF
     55 
     56 for $i (0x21 .. 0x7E) {
     57 for $j (0x21 .. 0x7E) {
     58   $_ = $i * 0x100 + $j;
     59   $u = $to_ucs[$_];
     60   if ($u) {
     61     printf OUT " 0x%.4X,", $u;
     62   } else {
     63     print OUT " 0,\t";
     64   }
     65   printf OUT "\t/* 0x%.4X */\n", $_;
     66 }
     67 }
     68 
     69 @ucs = sort { $a <=> $b } keys %from_ucs;
     70 $nucs = @ucs + 0;
     71 
     72 print OUT <<EOF;
     73 };
     74 
     75 #define N_ucs_${name}_map $nucs
     76 
     77 static wc_map ucs_${name}_map[ N_ucs_${name}_map ] = {
     78 EOF
     79 for(@ucs) {
     80   printf OUT "  { 0x%.4X, 0x%.4X },\n", $_, $from_ucs{$_};
     81 }
     82 
     83 print OUT <<EOF;
     84 };
     85 EOF
     86 
     87 close(MAP);
     88 }
     89 
     90 __END__
     91 gb2312		EASTASIA/GB/GB2312.TXT		GB 2312 (Chinese)
     92 gb12345		EASTASIA/GB/GB12345.TXT		GB 12345 (Chinese)