olefs

command line tools to extract data from OLE documents like doc, ppt, xls, msg
git clone https://logand.com/git/olefs.git/
Log | Files | Refs

cfb.c (14517B)


      1 // TODO version 4 with bigger sector size
      2 // TODO from pipe without seek
      3 
      4 const char *VERSION =
      5 #include "VERSION"
      6   ;
      7 
      8 #include <stdlib.h>
      9 #include <assert.h>
     10 #include <stdio.h>
     11 #include <string.h>
     12 #include <errno.h>
     13 #include <fcntl.h>
     14 #include <unistd.h>
     15 #include <stdint.h>
     16 
     17 // MS-CFB Compound File Binary File Format
     18 
     19 #define UNUSED_SECTOR 0
     20 #define MAXREGSECT    0xfffffffa
     21 #define DIFSECT       0xfffffffc
     22 #define FATSECT       0xfffffffd
     23 #define ENDOFCHAIN    0xfffffffe
     24 #define FREESECT      0xffffffff
     25 
     26 #define MAXREGSIG     0xfffffffa
     27 #define NOSTREAM      0xffffffff
     28 
     29 #define ENTRY_UNKNOWN 0
     30 #define ENTRY_STORAGE 1
     31 #define ENTRY_STREAM  2
     32 #define ENTRY_ROOT    5
     33 
     34 typedef uint8_t byte;
     35 typedef uint16_t ushort;
     36 typedef uint16_t wchar;
     37 typedef uint32_t dword;
     38 typedef uint64_t filetime;
     39 typedef uint64_t ulonglong;
     40 
     41 struct entry {
     42   wchar name[32];
     43   ushort name_length;
     44   byte object_type;
     45   byte color_flag;
     46   dword left_sibling_id;
     47   dword right_sibling_id;
     48   dword child_id;
     49   byte clsid[16];
     50   dword state_bits;
     51   filetime creation_time;
     52   filetime modified_time;
     53   dword starting_sector_location;
     54   ulonglong stream_size;
     55 };
     56 
     57 struct chain {
     58   dword location;
     59   struct chain *next;
     60 };
     61 
     62 struct header {
     63   byte signature[8];
     64   byte clsid[16];
     65   ushort minor_version;
     66   ushort major_version;
     67   ushort byte_order;
     68   ushort sector_shift;
     69   ushort mini_sector_shift;
     70   byte reserved[6];
     71   dword number_of_directory_sectors;
     72   dword number_of_fat_sectors;
     73   dword first_directory_sector_location;
     74   dword transaction_signature_number;
     75   dword mini_stream_cutoff_size;
     76   dword first_mini_fat_sector_location;
     77   dword number_of_mini_fat_sectors;
     78   dword first_difat_sector_location;
     79   dword number_of_difat_sectors;
     80 };
     81 
     82 struct cfb_file {
     83   FILE *stream;
     84   //static iconv_t conv;
     85   struct header header;
     86   dword difat_length;
     87   dword *difat;
     88   dword fat_length;
     89   dword *fat;
     90   struct chain *directory_chain;
     91   int directories_length;
     92   struct entry *directories;
     93   struct chain *mfat_chain;
     94   int mfat_length;
     95   dword *mfat;
     96 };
     97 
     98 static void seek_sector(FILE *stream, dword location) {
     99   fseek(stream, (1 + location) * 512, SEEK_SET);
    100 }
    101 
    102 static void read_byte(FILE *stream, byte *place, int count) {
    103   size_t n = fread(place, sizeof(byte), count, stream);
    104   assert(n == count);
    105 }
    106 
    107 static void read_wchar(FILE *stream, wchar *place, int count) {
    108   size_t n = fread(place, sizeof(wchar), count, stream);
    109   assert(n == count);
    110 }
    111 
    112 static void read_ushort(FILE *stream, ushort *place, int count) {
    113   size_t n = fread(place, sizeof(ushort), count, stream);
    114   assert(n == count);
    115 }
    116 
    117 static void read_dword(FILE *stream, dword *place, int count) {
    118   size_t n = fread(place, sizeof(dword), count, stream);
    119   assert(n == count);
    120 }
    121 
    122 static void read_filetime(FILE *stream, filetime *place, int count) {
    123   size_t n = fread(place, sizeof(filetime), count, stream);
    124   assert(n == count);
    125 }
    126 
    127 static void read_ulonglong(FILE *stream, ulonglong *place, int count) {
    128   size_t n = fread(place, sizeof(ulonglong), count, stream);
    129   assert(n == count);
    130 }
    131 
    132 static void read_header(FILE *stream, struct header *x) {
    133   read_byte(stream, x->signature, 8);
    134   read_byte(stream, x->clsid, 16);
    135   read_ushort(stream, &x->minor_version, 1);
    136   read_ushort(stream, &x->major_version, 1);
    137   read_ushort(stream, &x->byte_order, 1);
    138   read_ushort(stream, &x->sector_shift, 1);
    139   read_ushort(stream, &x->mini_sector_shift, 1);
    140   read_byte(stream, x->reserved, 6);
    141   read_dword(stream, &x->number_of_directory_sectors, 1);
    142   read_dword(stream, &x->number_of_fat_sectors, 1);
    143   read_dword(stream, &x->first_directory_sector_location, 1);
    144   read_dword(stream, &x->transaction_signature_number, 1);
    145   read_dword(stream, &x->mini_stream_cutoff_size, 1);
    146   read_dword(stream, &x->first_mini_fat_sector_location, 1);
    147   read_dword(stream, &x->number_of_mini_fat_sectors, 1);
    148   read_dword(stream, &x->first_difat_sector_location, 1);
    149   read_dword(stream, &x->number_of_difat_sectors, 1);
    150 }
    151 
    152 static void check_header(struct header *x) {
    153   static const byte expected_signature[8] =
    154     {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1};
    155   static const byte expected_reserved[6] = {0, 0, 0, 0, 0, 0};
    156   assert(!memcmp(x->signature, expected_signature, 8));
    157   /*   ;;(assert (equalp clsid_null (ole_header.clsid x))) */
    158   assert(0xfffe == x->byte_order);
    159   assert(!memcmp(x->reserved, expected_reserved, 6));
    160   assert(3 == x->major_version);
    161   assert(512 == (1 << x->sector_shift));
    162   assert(64 == (1 << x->mini_sector_shift));
    163   assert(0 == x->number_of_directory_sectors);
    164   /*   ;;(assert (eql 0xfffffffe (first_directory_sector_location x))) */
    165   assert(0 == x->transaction_signature_number);
    166   assert(4096 == x->mini_stream_cutoff_size);
    167   /*   ;;(assert (eql 0xfffffffe (first_mini_fat_sector_location x))) */
    168   if(x->number_of_difat_sectors <= 0)
    169     assert(0xfffffffe == x->first_difat_sector_location);
    170 }
    171 
    172 static void read_entry(FILE *stream, struct entry *entry) {
    173   read_wchar(stream, &entry->name[0], 32);
    174   read_ushort(stream, &entry->name_length, 1);
    175   read_byte(stream, &entry->object_type, 1);
    176   read_byte(stream, &entry->color_flag, 1);
    177   read_dword(stream, &entry->left_sibling_id, 1);
    178   read_dword(stream, &entry->right_sibling_id, 1);
    179   read_dword(stream, &entry->child_id, 1);
    180   read_byte(stream, &entry->clsid[0], 16);
    181   read_dword(stream, &entry->state_bits, 1);
    182   read_filetime(stream, &entry->creation_time, 1);
    183   read_filetime(stream, &entry->modified_time, 1);
    184   read_dword(stream, &entry->starting_sector_location, 1);
    185   read_ulonglong(stream, &entry->stream_size, 1);
    186 };
    187 
    188 static void print_bytes(byte *place, int count) {
    189   int i;
    190   for(i = 0; i < count; i++) {
    191     printf("%s%02x", (0 < i ? ":" : ""), place[i]);
    192   }
    193 }
    194 
    195 static void print_ushort(ushort x) {
    196   printf("%u 0x%04x", x, x);
    197 }
    198 
    199 static void print_dword(dword x) {
    200   printf("%u 0x%08x", x, x);
    201 }
    202 
    203 static void read_difat (FILE *stream, struct cfb_file *x) {
    204   x->difat_length = 109 + ((512 - 4) / 4) * x->header.number_of_difat_sectors;
    205   x->difat = calloc(x->difat_length, sizeof(dword));
    206   read_dword(stream, x->difat, 109);
    207   dword n = x->header.first_difat_sector_location, i = 109, m = 512 / 4 - 1;
    208   for(; n != ENDOFCHAIN; read_dword(stream, &n, 1), i += m) {
    209     seek_sector(stream, n);
    210     read_dword(stream, &x->difat[i], m);
    211   }
    212 }
    213 
    214 static void read_fat (FILE *stream, struct cfb_file *x) {
    215   dword m = 512 / 4;
    216   x->fat_length = m * x->difat_length;
    217   x->fat = calloc(x->fat_length, sizeof(dword));
    218   int i;
    219   for(i = 0; i < x->difat_length; i++) {
    220     dword s = x->difat[i];
    221     if(s != FREESECT) {
    222       seek_sector(stream, s);
    223       read_dword(stream, &x->fat[i * m], m);
    224     }
    225   }
    226 }
    227 
    228 static struct chain *make_chain(dword location, struct chain *next) {
    229   struct chain *x = malloc(sizeof(struct chain));
    230   x->location = location;
    231   x->next = next;
    232   return x;
    233 }
    234 
    235 static int chain_length(struct chain *x) {
    236   int i = 0;
    237   for(; x; x = x->next) i++;
    238   return i;
    239 }
    240 
    241 static struct chain *nth_chain(struct chain *chain, int n) {
    242   if(0 <= n) {
    243     for(; chain; chain = chain->next) {
    244       if(n <= 0)
    245         return chain;
    246       n--;
    247     }
    248   }
    249   return NULL;
    250 }
    251 
    252 static struct chain *sector_chain(dword *fat, dword location) {
    253   struct chain *x = NULL;
    254   switch(location) {
    255   case DIFSECT:
    256   case FATSECT:
    257   case ENDOFCHAIN:
    258   case FREESECT:
    259     break;
    260   default:
    261     assert(0 <= location && location <= MAXREGSECT);
    262     x = make_chain(location, sector_chain(fat, fat[location]));
    263   }
    264   return x;
    265 }
    266 
    267 static void read_directories (FILE *stream, struct cfb_file *x) {
    268   dword m = 512 / 128;
    269   x->directories_length = m * chain_length(x->directory_chain);
    270   x->directories = calloc(x->directories_length, sizeof(struct entry));
    271   int i = 0;
    272   for(struct chain *c = x->directory_chain; c; c = c->next) {
    273     seek_sector(stream, c->location);
    274     int j;
    275     for(j = 0; j < m; j++)
    276       read_entry(stream, &x->directories[i++]);
    277   }
    278 }
    279 
    280 static void read_mfat (FILE *stream, struct cfb_file *x) {
    281   dword m = 512 / 4;
    282   x->mfat_length = m * chain_length(x->mfat_chain);
    283   x->mfat = calloc(x->mfat_length, sizeof(dword));
    284   int i = 0;
    285   for(struct chain *c = x->mfat_chain; c; c = c->next) {
    286     seek_sector(stream, c->location);
    287     read_dword(stream, &x->mfat[i++ * m], m);
    288   }
    289 }
    290 
    291 static void open_cfb_file(char *filename, struct cfb_file *x) {
    292   FILE *stream = fopen(filename, "r");
    293   if(!stream) {
    294     fprintf(stderr, "Unable to open '%s'.\n", filename);
    295     exit(1);
    296   }
    297   x->stream = stream;
    298   read_header(stream, &x->header);
    299   check_header(&x->header);
    300   read_difat(stream, x);
    301   read_fat(stream, x);
    302   x->directory_chain = sector_chain(x->fat, x->header.first_directory_sector_location);
    303   read_directories(stream, x);
    304   x->mfat_chain = sector_chain(x->fat, x->header.first_mini_fat_sector_location);
    305   read_mfat(stream, x);
    306 }
    307 
    308 static void print_cfb_file(struct cfb_file *x) {
    309   printf("signature ");
    310   print_bytes(x->header.signature, 8);
    311   printf("\nclsid ");
    312   print_bytes(x->header.clsid, 16);
    313   printf("\nminor_version ");
    314   print_ushort(x->header.minor_version);
    315   printf("\nmajor_version ");
    316   print_ushort(x->header.major_version);
    317   printf("\nbyte_order ");
    318   print_ushort(x->header.byte_order);
    319   printf("\nsector_shift ");
    320   print_ushort(x->header.sector_shift);
    321   printf("\nmini_sector_shift ");
    322   print_ushort(x->header.mini_sector_shift);
    323   printf("\nreserved ");
    324   print_bytes(x->header.reserved, 6);
    325   printf("\nnumber_of_directory_sectors ");
    326   print_dword(x->header.number_of_directory_sectors);
    327   printf("\nnumber_of_fat_sectors ");
    328   print_dword(x->header.number_of_fat_sectors);
    329   printf("\nfirst_directory_sector_location ");
    330   print_dword(x->header.first_directory_sector_location);
    331   printf("\ntransaction_signature_number ");
    332   print_dword(x->header.transaction_signature_number);
    333   printf("\nmini_stream_cutoff_size ");
    334   print_dword(x->header.mini_stream_cutoff_size);
    335   printf("\nfirst_mini_fat_sector_location ");
    336   print_dword(x->header.first_mini_fat_sector_location);
    337   printf("\nnumber_of_mini_fat_sectors ");
    338   print_dword(x->header.number_of_mini_fat_sectors);
    339   printf("\nfirst_difat_sector_location ");
    340   print_dword(x->header.first_difat_sector_location);
    341   printf("\nnumber_of_difat_sectors ");
    342   print_dword(x->header.number_of_difat_sectors);
    343   //printf("\n");
    344   printf("\ndifat_length ");
    345   print_dword(x->difat_length);
    346   //dword *difat;
    347   printf("\nfat_length ");
    348   print_dword(x->fat_length);
    349   //dword *fat;
    350   //struct chain *directory_chain;
    351   printf("\ndirectories_length %d", x->directories_length);
    352   //struct entry *directories;
    353   //struct chain *mfat_chain;
    354   printf("\nmfat_length %d", x->mfat_length);
    355   //dword *mfat;
    356   printf("\n");
    357 }
    358 
    359 static void cat(struct entry *e, struct cfb_file *f) {
    360   int mini = e->stream_size < f->header.mini_stream_cutoff_size;
    361   struct chain *chain = sector_chain(f->fat, (mini ? f->directories : e)->starting_sector_location);
    362   struct chain *mchain = mini ? sector_chain(f->mfat, e->starting_sector_location) : NULL;
    363   dword sector = -1;
    364   byte buffer[512];
    365   for(int i = 0; i < e->stream_size; i++) {
    366     dword rr;
    367     dword q;
    368     if(mchain) {
    369       dword mq = i / 64;
    370       dword mr = i % 64;
    371       dword s = nth_chain(mchain, mq)->location;
    372       q = s / (512 / 64);
    373       dword r = s % (512 / 64);
    374       rr = (64 * r) + mr;
    375     } else {
    376       q = i / 512;
    377       dword r = i % 512;
    378       rr = r;
    379     }
    380     if(sector != q) {
    381       seek_sector(f->stream, nth_chain(chain, q)->location);
    382       size_t n = fread(buffer, sizeof(byte), 512, f->stream);
    383       assert(512 == n);
    384       sector = q;
    385     }
    386     fwrite(&buffer[rr], 1, 1, stdout);
    387   }
    388 }
    389 
    390 static size_t xconv(wchar *iname, char *oname, size_t length) { // TODO utf8
    391   int i;
    392   for(i = 0; i < length / sizeof(wchar); i++)
    393     oname[i] = iname[i];
    394   return 0;
    395 }
    396 
    397 static int walk(struct cfb_file *f, char *path, dword id, char *parent) {
    398   struct entry *e = &f->directories[id];
    399   if(e->object_type == ENTRY_STORAGE
    400      || e->object_type == ENTRY_STREAM
    401      || e->object_type == ENTRY_ROOT) {
    402     char name[32 * sizeof(wchar)];
    403     xconv(e->name, name, e->name_length);
    404     size_t len = strlen(parent) + 1 + strlen(name) + 1;
    405     char child[len];
    406     snprintf(child, len, "%s/%s", parent, name);
    407     if(path) {
    408       if(!strcmp(path, child)) {
    409         cat(e, f);
    410         return 0;
    411       }
    412     } else {
    413       if(e->object_type == ENTRY_STREAM) {
    414         printf("f %10lu %s\n", e->stream_size, child);
    415       } else {
    416         printf("d %10u %s\n", 0, child);
    417       }
    418     }
    419     dword n1 = e->left_sibling_id;
    420     if(n1 <= MAXREGSIG)
    421       if(!walk(f, path, n1, parent))
    422         return 0;
    423     dword n2 = e->child_id;
    424     if(n2 <= MAXREGSIG)
    425       if(!walk(f, path, n2, child))
    426         return 0;
    427     dword n3 = e->right_sibling_id;
    428     if(n3 <= MAXREGSIG)
    429       if(!walk(f, path, n3, parent))
    430         return 0;
    431   }
    432   return 1;
    433 }
    434 
    435 static void usage(FILE *stream) {
    436   fprintf(stream, "Usage:\n");
    437   fprintf(stream, "  cfb ls filename        list files\n");
    438   fprintf(stream, "  cfb cat filename path  write file to stdout\n");
    439   fprintf(stream, "  cfb info filename      print info\n");
    440   fprintf(stream, "  cfb --help             print help\n");
    441   fprintf(stream, "  cfb --version          print version\n");
    442 }
    443 
    444 static int cmd_ls(char *argv[]) {
    445   char *filename = argv[0];
    446   if(!filename) {
    447     usage(stderr);
    448     return 1;
    449   }
    450   struct cfb_file cfb_file;
    451   open_cfb_file(filename, &cfb_file);
    452   walk(&cfb_file, NULL, 0, "");
    453   return 0;
    454 }
    455 
    456 static int cmd_cat(char *argv[]) {
    457   char *filename = argv[0];
    458   if(!filename) {
    459     usage(stderr);
    460     return 1;
    461   }
    462   char *path = argv[1];
    463   if(!path) {
    464     usage(stderr);
    465     return 1;
    466   }
    467   struct cfb_file cfb_file;
    468   open_cfb_file(filename, &cfb_file);
    469   walk(&cfb_file, path, 0, "");
    470   return 0;
    471 }
    472 
    473 static int cmd_info(char *argv[]) {
    474   char *filename = argv[0];
    475   if(!filename) {
    476     usage(stderr);
    477     return 1;
    478   }
    479   struct cfb_file cfb_file;
    480   open_cfb_file(filename, &cfb_file);
    481   print_cfb_file(&cfb_file);
    482   return 0;
    483 }
    484 
    485 static int cmd_help(void) {
    486   usage(stdout);
    487   return 0;
    488 }
    489 
    490 static int cmd_version(void) {
    491   printf("%s\n", VERSION);
    492   return 0;
    493 }
    494 
    495 int main(int argc, char **argv) {
    496   char *cmd = *++argv;
    497   if(!cmd) {
    498     usage(stderr);
    499     return 1;
    500   }
    501   ++argv;
    502   if(!strcmp("ls", cmd)) return cmd_ls(argv);
    503   else if(!strcmp("cat", cmd)) return cmd_cat(argv);
    504   else if(!strcmp("info", cmd)) return cmd_info(argv);
    505   else if(!strcmp("--help", cmd)) return cmd_help();
    506   else if(!strcmp("--version", cmd)) return cmd_version();
    507   else usage(stderr);
    508   return 1;
    509 }