cfb.c (14517B)
1 // TODO version 4 with bigger sector size 2 // TODO from pipe without seek 3 4 const char *VERSION = 5 #include "VERSION" 6 ; 7 8 #include <stdlib.h> 9 #include <assert.h> 10 #include <stdio.h> 11 #include <string.h> 12 #include <errno.h> 13 #include <fcntl.h> 14 #include <unistd.h> 15 #include <stdint.h> 16 17 // MS-CFB Compound File Binary File Format 18 19 #define UNUSED_SECTOR 0 20 #define MAXREGSECT 0xfffffffa 21 #define DIFSECT 0xfffffffc 22 #define FATSECT 0xfffffffd 23 #define ENDOFCHAIN 0xfffffffe 24 #define FREESECT 0xffffffff 25 26 #define MAXREGSIG 0xfffffffa 27 #define NOSTREAM 0xffffffff 28 29 #define ENTRY_UNKNOWN 0 30 #define ENTRY_STORAGE 1 31 #define ENTRY_STREAM 2 32 #define ENTRY_ROOT 5 33 34 typedef uint8_t byte; 35 typedef uint16_t ushort; 36 typedef uint16_t wchar; 37 typedef uint32_t dword; 38 typedef uint64_t filetime; 39 typedef uint64_t ulonglong; 40 41 struct entry { 42 wchar name[32]; 43 ushort name_length; 44 byte object_type; 45 byte color_flag; 46 dword left_sibling_id; 47 dword right_sibling_id; 48 dword child_id; 49 byte clsid[16]; 50 dword state_bits; 51 filetime creation_time; 52 filetime modified_time; 53 dword starting_sector_location; 54 ulonglong stream_size; 55 }; 56 57 struct chain { 58 dword location; 59 struct chain *next; 60 }; 61 62 struct header { 63 byte signature[8]; 64 byte clsid[16]; 65 ushort minor_version; 66 ushort major_version; 67 ushort byte_order; 68 ushort sector_shift; 69 ushort mini_sector_shift; 70 byte reserved[6]; 71 dword number_of_directory_sectors; 72 dword number_of_fat_sectors; 73 dword first_directory_sector_location; 74 dword transaction_signature_number; 75 dword mini_stream_cutoff_size; 76 dword first_mini_fat_sector_location; 77 dword number_of_mini_fat_sectors; 78 dword first_difat_sector_location; 79 dword number_of_difat_sectors; 80 }; 81 82 struct cfb_file { 83 FILE *stream; 84 //static iconv_t conv; 85 struct header header; 86 dword difat_length; 87 dword *difat; 88 dword fat_length; 89 dword *fat; 90 struct chain *directory_chain; 91 int directories_length; 92 struct entry *directories; 93 struct chain *mfat_chain; 94 int mfat_length; 95 dword *mfat; 96 }; 97 98 static void seek_sector(FILE *stream, dword location) { 99 fseek(stream, (1 + location) * 512, SEEK_SET); 100 } 101 102 static void read_byte(FILE *stream, byte *place, int count) { 103 size_t n = fread(place, sizeof(byte), count, stream); 104 assert(n == count); 105 } 106 107 static void read_wchar(FILE *stream, wchar *place, int count) { 108 size_t n = fread(place, sizeof(wchar), count, stream); 109 assert(n == count); 110 } 111 112 static void read_ushort(FILE *stream, ushort *place, int count) { 113 size_t n = fread(place, sizeof(ushort), count, stream); 114 assert(n == count); 115 } 116 117 static void read_dword(FILE *stream, dword *place, int count) { 118 size_t n = fread(place, sizeof(dword), count, stream); 119 assert(n == count); 120 } 121 122 static void read_filetime(FILE *stream, filetime *place, int count) { 123 size_t n = fread(place, sizeof(filetime), count, stream); 124 assert(n == count); 125 } 126 127 static void read_ulonglong(FILE *stream, ulonglong *place, int count) { 128 size_t n = fread(place, sizeof(ulonglong), count, stream); 129 assert(n == count); 130 } 131 132 static void read_header(FILE *stream, struct header *x) { 133 read_byte(stream, x->signature, 8); 134 read_byte(stream, x->clsid, 16); 135 read_ushort(stream, &x->minor_version, 1); 136 read_ushort(stream, &x->major_version, 1); 137 read_ushort(stream, &x->byte_order, 1); 138 read_ushort(stream, &x->sector_shift, 1); 139 read_ushort(stream, &x->mini_sector_shift, 1); 140 read_byte(stream, x->reserved, 6); 141 read_dword(stream, &x->number_of_directory_sectors, 1); 142 read_dword(stream, &x->number_of_fat_sectors, 1); 143 read_dword(stream, &x->first_directory_sector_location, 1); 144 read_dword(stream, &x->transaction_signature_number, 1); 145 read_dword(stream, &x->mini_stream_cutoff_size, 1); 146 read_dword(stream, &x->first_mini_fat_sector_location, 1); 147 read_dword(stream, &x->number_of_mini_fat_sectors, 1); 148 read_dword(stream, &x->first_difat_sector_location, 1); 149 read_dword(stream, &x->number_of_difat_sectors, 1); 150 } 151 152 static void check_header(struct header *x) { 153 static const byte expected_signature[8] = 154 {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1}; 155 static const byte expected_reserved[6] = {0, 0, 0, 0, 0, 0}; 156 assert(!memcmp(x->signature, expected_signature, 8)); 157 /* ;;(assert (equalp clsid_null (ole_header.clsid x))) */ 158 assert(0xfffe == x->byte_order); 159 assert(!memcmp(x->reserved, expected_reserved, 6)); 160 assert(3 == x->major_version); 161 assert(512 == (1 << x->sector_shift)); 162 assert(64 == (1 << x->mini_sector_shift)); 163 assert(0 == x->number_of_directory_sectors); 164 /* ;;(assert (eql 0xfffffffe (first_directory_sector_location x))) */ 165 assert(0 == x->transaction_signature_number); 166 assert(4096 == x->mini_stream_cutoff_size); 167 /* ;;(assert (eql 0xfffffffe (first_mini_fat_sector_location x))) */ 168 if(x->number_of_difat_sectors <= 0) 169 assert(0xfffffffe == x->first_difat_sector_location); 170 } 171 172 static void read_entry(FILE *stream, struct entry *entry) { 173 read_wchar(stream, &entry->name[0], 32); 174 read_ushort(stream, &entry->name_length, 1); 175 read_byte(stream, &entry->object_type, 1); 176 read_byte(stream, &entry->color_flag, 1); 177 read_dword(stream, &entry->left_sibling_id, 1); 178 read_dword(stream, &entry->right_sibling_id, 1); 179 read_dword(stream, &entry->child_id, 1); 180 read_byte(stream, &entry->clsid[0], 16); 181 read_dword(stream, &entry->state_bits, 1); 182 read_filetime(stream, &entry->creation_time, 1); 183 read_filetime(stream, &entry->modified_time, 1); 184 read_dword(stream, &entry->starting_sector_location, 1); 185 read_ulonglong(stream, &entry->stream_size, 1); 186 }; 187 188 static void print_bytes(byte *place, int count) { 189 int i; 190 for(i = 0; i < count; i++) { 191 printf("%s%02x", (0 < i ? ":" : ""), place[i]); 192 } 193 } 194 195 static void print_ushort(ushort x) { 196 printf("%u 0x%04x", x, x); 197 } 198 199 static void print_dword(dword x) { 200 printf("%u 0x%08x", x, x); 201 } 202 203 static void read_difat (FILE *stream, struct cfb_file *x) { 204 x->difat_length = 109 + ((512 - 4) / 4) * x->header.number_of_difat_sectors; 205 x->difat = calloc(x->difat_length, sizeof(dword)); 206 read_dword(stream, x->difat, 109); 207 dword n = x->header.first_difat_sector_location, i = 109, m = 512 / 4 - 1; 208 for(; n != ENDOFCHAIN; read_dword(stream, &n, 1), i += m) { 209 seek_sector(stream, n); 210 read_dword(stream, &x->difat[i], m); 211 } 212 } 213 214 static void read_fat (FILE *stream, struct cfb_file *x) { 215 dword m = 512 / 4; 216 x->fat_length = m * x->difat_length; 217 x->fat = calloc(x->fat_length, sizeof(dword)); 218 int i; 219 for(i = 0; i < x->difat_length; i++) { 220 dword s = x->difat[i]; 221 if(s != FREESECT) { 222 seek_sector(stream, s); 223 read_dword(stream, &x->fat[i * m], m); 224 } 225 } 226 } 227 228 static struct chain *make_chain(dword location, struct chain *next) { 229 struct chain *x = malloc(sizeof(struct chain)); 230 x->location = location; 231 x->next = next; 232 return x; 233 } 234 235 static int chain_length(struct chain *x) { 236 int i = 0; 237 for(; x; x = x->next) i++; 238 return i; 239 } 240 241 static struct chain *nth_chain(struct chain *chain, int n) { 242 if(0 <= n) { 243 for(; chain; chain = chain->next) { 244 if(n <= 0) 245 return chain; 246 n--; 247 } 248 } 249 return NULL; 250 } 251 252 static struct chain *sector_chain(dword *fat, dword location) { 253 struct chain *x = NULL; 254 switch(location) { 255 case DIFSECT: 256 case FATSECT: 257 case ENDOFCHAIN: 258 case FREESECT: 259 break; 260 default: 261 assert(0 <= location && location <= MAXREGSECT); 262 x = make_chain(location, sector_chain(fat, fat[location])); 263 } 264 return x; 265 } 266 267 static void read_directories (FILE *stream, struct cfb_file *x) { 268 dword m = 512 / 128; 269 x->directories_length = m * chain_length(x->directory_chain); 270 x->directories = calloc(x->directories_length, sizeof(struct entry)); 271 int i = 0; 272 for(struct chain *c = x->directory_chain; c; c = c->next) { 273 seek_sector(stream, c->location); 274 int j; 275 for(j = 0; j < m; j++) 276 read_entry(stream, &x->directories[i++]); 277 } 278 } 279 280 static void read_mfat (FILE *stream, struct cfb_file *x) { 281 dword m = 512 / 4; 282 x->mfat_length = m * chain_length(x->mfat_chain); 283 x->mfat = calloc(x->mfat_length, sizeof(dword)); 284 int i = 0; 285 for(struct chain *c = x->mfat_chain; c; c = c->next) { 286 seek_sector(stream, c->location); 287 read_dword(stream, &x->mfat[i++ * m], m); 288 } 289 } 290 291 static void open_cfb_file(char *filename, struct cfb_file *x) { 292 FILE *stream = fopen(filename, "r"); 293 if(!stream) { 294 fprintf(stderr, "Unable to open '%s'.\n", filename); 295 exit(1); 296 } 297 x->stream = stream; 298 read_header(stream, &x->header); 299 check_header(&x->header); 300 read_difat(stream, x); 301 read_fat(stream, x); 302 x->directory_chain = sector_chain(x->fat, x->header.first_directory_sector_location); 303 read_directories(stream, x); 304 x->mfat_chain = sector_chain(x->fat, x->header.first_mini_fat_sector_location); 305 read_mfat(stream, x); 306 } 307 308 static void print_cfb_file(struct cfb_file *x) { 309 printf("signature "); 310 print_bytes(x->header.signature, 8); 311 printf("\nclsid "); 312 print_bytes(x->header.clsid, 16); 313 printf("\nminor_version "); 314 print_ushort(x->header.minor_version); 315 printf("\nmajor_version "); 316 print_ushort(x->header.major_version); 317 printf("\nbyte_order "); 318 print_ushort(x->header.byte_order); 319 printf("\nsector_shift "); 320 print_ushort(x->header.sector_shift); 321 printf("\nmini_sector_shift "); 322 print_ushort(x->header.mini_sector_shift); 323 printf("\nreserved "); 324 print_bytes(x->header.reserved, 6); 325 printf("\nnumber_of_directory_sectors "); 326 print_dword(x->header.number_of_directory_sectors); 327 printf("\nnumber_of_fat_sectors "); 328 print_dword(x->header.number_of_fat_sectors); 329 printf("\nfirst_directory_sector_location "); 330 print_dword(x->header.first_directory_sector_location); 331 printf("\ntransaction_signature_number "); 332 print_dword(x->header.transaction_signature_number); 333 printf("\nmini_stream_cutoff_size "); 334 print_dword(x->header.mini_stream_cutoff_size); 335 printf("\nfirst_mini_fat_sector_location "); 336 print_dword(x->header.first_mini_fat_sector_location); 337 printf("\nnumber_of_mini_fat_sectors "); 338 print_dword(x->header.number_of_mini_fat_sectors); 339 printf("\nfirst_difat_sector_location "); 340 print_dword(x->header.first_difat_sector_location); 341 printf("\nnumber_of_difat_sectors "); 342 print_dword(x->header.number_of_difat_sectors); 343 //printf("\n"); 344 printf("\ndifat_length "); 345 print_dword(x->difat_length); 346 //dword *difat; 347 printf("\nfat_length "); 348 print_dword(x->fat_length); 349 //dword *fat; 350 //struct chain *directory_chain; 351 printf("\ndirectories_length %d", x->directories_length); 352 //struct entry *directories; 353 //struct chain *mfat_chain; 354 printf("\nmfat_length %d", x->mfat_length); 355 //dword *mfat; 356 printf("\n"); 357 } 358 359 static void cat(struct entry *e, struct cfb_file *f) { 360 int mini = e->stream_size < f->header.mini_stream_cutoff_size; 361 struct chain *chain = sector_chain(f->fat, (mini ? f->directories : e)->starting_sector_location); 362 struct chain *mchain = mini ? sector_chain(f->mfat, e->starting_sector_location) : NULL; 363 dword sector = -1; 364 byte buffer[512]; 365 for(int i = 0; i < e->stream_size; i++) { 366 dword rr; 367 dword q; 368 if(mchain) { 369 dword mq = i / 64; 370 dword mr = i % 64; 371 dword s = nth_chain(mchain, mq)->location; 372 q = s / (512 / 64); 373 dword r = s % (512 / 64); 374 rr = (64 * r) + mr; 375 } else { 376 q = i / 512; 377 dword r = i % 512; 378 rr = r; 379 } 380 if(sector != q) { 381 seek_sector(f->stream, nth_chain(chain, q)->location); 382 size_t n = fread(buffer, sizeof(byte), 512, f->stream); 383 assert(512 == n); 384 sector = q; 385 } 386 fwrite(&buffer[rr], 1, 1, stdout); 387 } 388 } 389 390 static size_t xconv(wchar *iname, char *oname, size_t length) { // TODO utf8 391 int i; 392 for(i = 0; i < length / sizeof(wchar); i++) 393 oname[i] = iname[i]; 394 return 0; 395 } 396 397 static int walk(struct cfb_file *f, char *path, dword id, char *parent) { 398 struct entry *e = &f->directories[id]; 399 if(e->object_type == ENTRY_STORAGE 400 || e->object_type == ENTRY_STREAM 401 || e->object_type == ENTRY_ROOT) { 402 char name[32 * sizeof(wchar)]; 403 xconv(e->name, name, e->name_length); 404 size_t len = strlen(parent) + 1 + strlen(name) + 1; 405 char child[len]; 406 snprintf(child, len, "%s/%s", parent, name); 407 if(path) { 408 if(!strcmp(path, child)) { 409 cat(e, f); 410 return 0; 411 } 412 } else { 413 if(e->object_type == ENTRY_STREAM) { 414 printf("f %10lu %s\n", e->stream_size, child); 415 } else { 416 printf("d %10u %s\n", 0, child); 417 } 418 } 419 dword n1 = e->left_sibling_id; 420 if(n1 <= MAXREGSIG) 421 if(!walk(f, path, n1, parent)) 422 return 0; 423 dword n2 = e->child_id; 424 if(n2 <= MAXREGSIG) 425 if(!walk(f, path, n2, child)) 426 return 0; 427 dword n3 = e->right_sibling_id; 428 if(n3 <= MAXREGSIG) 429 if(!walk(f, path, n3, parent)) 430 return 0; 431 } 432 return 1; 433 } 434 435 static void usage(FILE *stream) { 436 fprintf(stream, "Usage:\n"); 437 fprintf(stream, " cfb ls filename list files\n"); 438 fprintf(stream, " cfb cat filename path write file to stdout\n"); 439 fprintf(stream, " cfb info filename print info\n"); 440 fprintf(stream, " cfb --help print help\n"); 441 fprintf(stream, " cfb --version print version\n"); 442 } 443 444 static int cmd_ls(char *argv[]) { 445 char *filename = argv[0]; 446 if(!filename) { 447 usage(stderr); 448 return 1; 449 } 450 struct cfb_file cfb_file; 451 open_cfb_file(filename, &cfb_file); 452 walk(&cfb_file, NULL, 0, ""); 453 return 0; 454 } 455 456 static int cmd_cat(char *argv[]) { 457 char *filename = argv[0]; 458 if(!filename) { 459 usage(stderr); 460 return 1; 461 } 462 char *path = argv[1]; 463 if(!path) { 464 usage(stderr); 465 return 1; 466 } 467 struct cfb_file cfb_file; 468 open_cfb_file(filename, &cfb_file); 469 walk(&cfb_file, path, 0, ""); 470 return 0; 471 } 472 473 static int cmd_info(char *argv[]) { 474 char *filename = argv[0]; 475 if(!filename) { 476 usage(stderr); 477 return 1; 478 } 479 struct cfb_file cfb_file; 480 open_cfb_file(filename, &cfb_file); 481 print_cfb_file(&cfb_file); 482 return 0; 483 } 484 485 static int cmd_help(void) { 486 usage(stdout); 487 return 0; 488 } 489 490 static int cmd_version(void) { 491 printf("%s\n", VERSION); 492 return 0; 493 } 494 495 int main(int argc, char **argv) { 496 char *cmd = *++argv; 497 if(!cmd) { 498 usage(stderr); 499 return 1; 500 } 501 ++argv; 502 if(!strcmp("ls", cmd)) return cmd_ls(argv); 503 else if(!strcmp("cat", cmd)) return cmd_cat(argv); 504 else if(!strcmp("info", cmd)) return cmd_info(argv); 505 else if(!strcmp("--help", cmd)) return cmd_help(); 506 else if(!strcmp("--version", cmd)) return cmd_version(); 507 else usage(stderr); 508 return 1; 509 }