ppt.c (12003B)
1 // TODO proper little endian read/write 2 // TODO pic in place instead of appended 3 // TODO limit mem like timeout 4 5 const char *VERSION = 6 #include "VERSION" 7 ; 8 9 #include <stdlib.h> 10 #include <stdio.h> 11 #include <string.h> 12 #include <errno.h> 13 #include <unistd.h> 14 #include <stdint.h> 15 #include <assert.h> 16 #include <limits.h> 17 18 enum cmd {CMD_LS, CMD_CAT, CMD_TXT, CMD_HTML, CMD_EXTRACT}; 19 20 typedef uint8_t byte; 21 typedef uint32_t dword; 22 23 // MS-PPT PowerPoint (.ppt) Binary File Format 24 25 struct RecordHeader { 26 ushort recVer: 4; //(logand #x0f %dummy1)) 27 ushort recInstance: 12; //(logior (ash %dummy2 4) (ash %dummy1 -4))) 28 ushort recType; 29 dword recLen; 30 } __attribute__((__packed__)); 31 32 struct in { 33 FILE *stream; 34 long size; 35 char *buf; 36 long max; 37 long pos; 38 }; 39 40 static long in_tell(struct in *in) { 41 if(in->buf) { 42 return in->pos; 43 } 44 long z = ftell(in->stream); 45 if(z < 0) { 46 fprintf(stderr, "stream not seekable\n"); 47 exit(1); 48 } 49 return z; 50 } 51 52 #define MB (1024 * 1024) 53 54 static void in_open(struct in *in, char *filename) { 55 in->stream = filename ? fopen(filename, "r") : stdin; 56 if(!in->stream) { 57 fprintf(stderr, "Unable to open '%s'.\n", filename); 58 exit(1); 59 } 60 in->size = 0; 61 in->buf = NULL; 62 in->max = 0; 63 in->pos = 0; 64 if(ftell(in->stream) < 0) { /* not seekable */ 65 in->size = MB; 66 in->buf = malloc(in->size); 67 in->max = 0; 68 while(!feof(in->stream)) { 69 if(in->size <= in->max) { 70 in->size += MB; 71 in->buf = realloc(in->buf, in->size); 72 } 73 size_t n = fread(&in->buf[in->max], 1, in->size - in->max, in->stream); 74 if(n < 0) { 75 fprintf(stderr, "unexpected end of file\n"); 76 exit(1); 77 } 78 in->max += n; 79 } 80 } 81 } 82 83 static void in_close(struct in *in) { 84 free(in->buf); 85 fclose(in->stream); 86 } 87 88 static size_t in_read(void *ptr, size_t size, size_t nmemb, struct in *in) { 89 if(in->buf) { 90 size_t n1 = size * nmemb; 91 size_t n2 = n1 <= in->max - in->pos ? n1 : in->max - in->pos; 92 memcpy(ptr, &in->buf[in->pos], n2); 93 in->pos += n2; 94 if(n2 % size) { 95 fprintf(stderr, "partial data read\n"); 96 exit(1); 97 } 98 return n2 / size; 99 } 100 return fread(ptr, size, nmemb, in->stream); 101 } 102 103 static void in_seek(struct in *in, long offset) { 104 int z = 1; 105 if(in->buf) { 106 if(offset <= in->max) { 107 in->pos = offset; 108 return; 109 } 110 } else { 111 z = fseek(in->stream, offset, SEEK_SET); 112 } 113 if(z) { 114 fprintf(stderr, "seek to %ld failed\n", offset); 115 exit(1); 116 } 117 } 118 119 static size_t read_RecordHeader(struct in *in, struct RecordHeader *x) { 120 return in_read(x, sizeof(struct RecordHeader), 1, in); 121 } 122 123 static size_t write_RecordHeader(struct RecordHeader *x) { 124 return fwrite(x, sizeof(struct RecordHeader), 1, stdout); 125 } 126 127 static void cat(struct in *in, FILE *out, dword n) { 128 for(dword i = 0; i < n;) { 129 char buf[4096]; 130 dword m = n - i; 131 size_t n1 = in_read(buf, 1, m <= 4096 ? m : 4096, in); 132 if(n1 <= 0) { 133 fprintf(stderr, "unexpected end of file\n"); 134 exit(1); 135 } 136 size_t n2 = fwrite(buf, 1, n1, out); 137 if(n1 != n2) { 138 fprintf(stderr, "output failed\n"); 139 exit(1); 140 } 141 i += n1; 142 } 143 } 144 145 static void utf8txt(uint16_t c) { 146 if(c <= 0) return; 147 if(c < 0x80) { // 0xxxxxxx 148 switch(c) { 149 case '\r': puts(""); break; 150 default: putchar(c); 151 } 152 } else if(c < 0x800) { // 110xxxxx 10xxxxxx 153 putchar(0xc0 | (c >> 6)); 154 putchar(0x80 | (0x3f & c)); 155 } else if(c < 0x10000) { // 1110xxxx 10xxxxxx 10xxxxxx 156 putchar(0xe0 | (c >> 12)); 157 putchar(0x80 | (0x3f & (c >> 6))); 158 putchar(0x80 | (0x3f & c)); 159 } 160 } 161 162 static void txt(struct RecordHeader *h, struct in *in) { 163 switch(h->recType) { 164 case 0x0fa0: // RT_TextCharsAtom utf16le 165 case 0x0fba: // RT_CString 166 for(int i = 0; i < h->recLen; i += 2) { 167 uint16_t c; 168 if(1 != in_read(&c, 2, 1, in)) { 169 fprintf(stderr, "unexpected end of file\n"); 170 exit(1); 171 } 172 utf8txt(c); 173 } 174 puts(""); 175 break; 176 case 0x0fa8: // RT_TextBytesAtom ascii 177 for(int i = 0; i < h->recLen; i++) { 178 uint8_t c; 179 if(1 != in_read(&c, 1, 1, in)) { 180 fprintf(stderr, "unexpected end of file\n"); 181 exit(1); 182 } 183 utf8txt(c); 184 } 185 puts(""); 186 break; 187 case 0x03ee: // RT_Slide 188 case 0x03e8: // RT_Document 189 { 190 static int slide = 0; 191 if(0 < slide) puts(""); 192 slide++; 193 } 194 } 195 } 196 197 static void utf8html(uint16_t c) { 198 if(c <= 0) return; 199 if(c < 0x80) { // 0xxxxxxx 200 switch(c) { 201 case '&': printf("&"); break; 202 case '<': printf("<"); break; 203 case '>': printf(">"); break; 204 case '\'': printf("""); break; 205 case '\r': puts("<br>"); break; 206 default: putchar(c); 207 } 208 } else if(c < 0x800) { // 110xxxxx 10xxxxxx 209 putchar(0xc0 | (c >> 6)); 210 putchar(0x80 | (0x3f & c)); 211 } else if(c < 0x10000) { // 1110xxxx 10xxxxxx 10xxxxxx 212 putchar(0xe0 | (c >> 12)); 213 putchar(0x80 | (0x3f & (c >> 6))); 214 putchar(0x80 | (0x3f & c)); 215 } 216 } 217 218 static void html(struct RecordHeader *h, struct in *in) { 219 switch(h->recType) { 220 case 0x0fa0: // RT_TextCharsAtom utf16le 221 case 0x0fba: // RT_CString 222 printf("<p>"); 223 for(int i = 0; i < h->recLen; i += 2) { 224 uint16_t c; 225 if(1 != in_read(&c, 2, 1, in)) { 226 fprintf(stderr, "unexpected end of file\n"); 227 exit(1); 228 } 229 utf8html(c); 230 } 231 puts("</p>"); 232 break; 233 case 0x0fa8: // RT_TextBytesAtom ascii 234 printf("<p>"); 235 for(int i = 0; i < h->recLen; i++) { 236 uint8_t c; 237 if(1 != in_read(&c, 1, 1, in)) { 238 fprintf(stderr, "unexpected end of file\n"); 239 exit(1); 240 } 241 utf8html(c); 242 } 243 puts("</p>"); 244 break; 245 case 0x03ee: // RT_Slide 246 case 0x03e8: // RT_Document 247 { 248 static int slide = 0; 249 if(0 < slide) puts("<hr/>"); 250 slide++; 251 printf("<h1>Slide %d</h1>\n", slide); 252 } 253 } 254 } 255 256 // MS-ODRAW Office Drawing Binary File Format 257 258 struct POINT { 259 dword x; 260 dword y; 261 } __attribute__((__packed__)); 262 263 struct RECT { 264 dword left; 265 dword top; 266 dword right; 267 dword bottom; 268 } __attribute__((__packed__)); 269 270 struct OfficeArtMetafileHeader { 271 dword cbSize; 272 struct RECT rcBounds; 273 struct POINT ptSize; 274 dword cbSave; 275 byte compression; // :member '(#x00 #xfe)) 276 byte filter; //:always #xfe)) 277 } __attribute__((__packed__)); 278 279 static size_t read_OfficeArtMetafileHeader(struct in *in, 280 struct OfficeArtMetafileHeader *x) { 281 return in_read(x, 1, sizeof(struct OfficeArtMetafileHeader), in); 282 } 283 284 static size_t read_guid(struct in *in, byte guid[]) { 285 return in_read(guid, 1, 16, in); 286 } 287 288 static const struct OfficeArtBlip_config { 289 ushort recType; 290 ushort recInstance[4]; 291 char *ext; 292 ushort guid2[2]; 293 int metafileHeader; 294 } OfficeArtBlip_config[] = { 295 {0xf01a, {0x3d4, 0x3d5, 0, 0}, "emf", {0x3d5, 0}, 1}, 296 {0xf01b, {0x216, 0x217, 0, 0}, "wmf", {0x217, 0}, 1}, 297 {0xf01c, {0x542, 0x543, 0, 0}, "pict", {0x543, 0}, 1}, 298 {0xf01d, {0x46a, 0x46b, 0x6e2, 0x6e3}, "jpeg", {0x46b, 0x6e3}, 0}, 299 {0xf01e, {0x6e0, 0x6e1, 0, 0}, "png", {0x6e1, 0}, 0}, 300 {0xf01f, {0x7a8, 0x7a9, 0, 0}, "dib", {0x7a9, 0}, 0}, 301 {0xf029, {0x6e4, 0x6e5, 0, 0}, "tiff", {0x6e5, 0}, 0}, 302 {0xf02a, {0x46a, 0x46b, 0x6e2, 0x6e3}, "jpeg", {0x46b, 0x6e3}, 0}, 303 {0} 304 }; 305 306 static void extract(struct RecordHeader *h, struct in *in) { 307 static int img = 0; 308 for(int i = 0; OfficeArtBlip_config[i].recType; i++) { 309 const struct OfficeArtBlip_config *c = &OfficeArtBlip_config[i]; 310 if(h->recType == c->recType) { 311 char filename[PATH_MAX]; 312 snprintf(filename, PATH_MAX, "%d.%s", img++, c->ext); 313 size_t n = 0; 314 byte guid[16]; 315 n += read_guid(in, guid); 316 if(h->recInstance == c->guid2[0] 317 || (c->guid2[1] && h->recInstance == c->guid2[1])) 318 n += read_guid(in, guid); 319 if(c->metafileHeader) { 320 struct OfficeArtMetafileHeader h2; 321 n += read_OfficeArtMetafileHeader(in, &h2); 322 } else { 323 byte b; 324 n += in_read(&b, 1, 1, in); 325 } 326 FILE *f = fopen(filename, "w"); 327 cat(in, f, h->recLen - n); 328 fclose(f); 329 } 330 } 331 } 332 333 static void out(struct RecordHeader *h, struct in *in, int level, int i, int xlevel, int xi, enum cmd c) { 334 switch(c) { 335 case CMD_LS: 336 printf("%4d %4d 0x%04x 0x%04x 0x%04x %10u\n", 337 level, i, h->recVer, h->recInstance, h->recType, h->recLen); 338 break; 339 case CMD_CAT: 340 if(level == xlevel && i == xi) { 341 if(1 != write_RecordHeader(h)) { 342 fprintf(stderr, "output failed\n"); 343 exit(1); 344 } 345 cat(in, stdout, h->recLen); 346 exit(0); 347 } 348 case CMD_TXT: txt(h, in); break; 349 case CMD_HTML: html(h, in); break; 350 case CMD_EXTRACT: extract(h, in); break; 351 } 352 } 353 354 static void walk(struct in *in, int level, dword pos, int xlevel, int xi, enum cmd cmd) { 355 assert(0 <= level); 356 assert(0 <= pos); 357 for(int i = 0;; i++) { 358 if(0 < pos && pos <= in_tell(in)) 359 break; 360 struct RecordHeader h; 361 size_t n = read_RecordHeader(in, &h); 362 if(n <= 0) { 363 break; // EOF 364 } 365 if(1 != n) { 366 fprintf(stderr, "error reading record header\n"); 367 exit(1); 368 } 369 size_t start = in_tell(in), end = start + h.recLen; 370 out(&h, in, level, i, xlevel, xi, cmd); 371 if(0xf == h.recVer) { 372 if(0 < pos) 373 end = end < pos ? end : pos; 374 walk(in, 1 + level, end, xlevel, xi, cmd); 375 } else 376 in_seek(in, end); 377 } 378 // TODO xlevel xi not found -> exit(1) 379 } 380 381 static int cmd_ls(char *argv[]) { 382 char *filename = argv[0]; 383 struct in in; 384 in_open(&in, filename); 385 walk(&in, 0, 0, -1, -1, CMD_LS); 386 in_close(&in); 387 return 0; 388 } 389 390 static int cmd_cat(char *argv[]) { 391 char *level = argv[0]; 392 if(!level) { 393 fprintf(stderr, "level expected\n"); 394 return 1; 395 } 396 int xlevel; 397 if(1 != sscanf(level, "%d", &xlevel)) { 398 fprintf(stderr, "unknown level '%s'\n", level); 399 return 1; 400 } 401 char *i = argv[1]; 402 if(!i) { 403 fprintf(stderr, "i expected\n"); 404 return 1; 405 } 406 int xi; 407 if(1 != sscanf(i, "%d", &xi)) { 408 fprintf(stderr, "unknown i '%s'\n", i); 409 return 1; 410 } 411 char *filename = argv[2]; 412 struct in in; 413 in_open(&in, filename); 414 walk(&in, 0, 0, xlevel, xi, CMD_CAT); 415 in_close(&in); 416 return 0; 417 } 418 419 static int cmd_txt(char *argv[]) { 420 char *filename = argv[0]; 421 struct in in; 422 in_open(&in, filename); 423 walk(&in, 0, 0, -1, -1, CMD_TXT); 424 in_close(&in); 425 return 0; 426 } 427 428 static int cmd_html(char *argv[]) { 429 char *filename = argv[0]; 430 struct in in; 431 in_open(&in, filename); 432 walk(&in, 0, 0, -1, -1, CMD_HTML); 433 in_close(&in); 434 return 0; 435 } 436 437 static int cmd_extract(char *argv[]) { 438 char *filename = argv[0]; 439 struct in in; 440 in_open(&in, filename); 441 walk(&in, 0, 0, -1, -1, CMD_EXTRACT); 442 in_close(&in); 443 return 0; 444 } 445 446 static int cmd_help(void) { 447 printf("usage:\n"); 448 printf(" ppt ls [filename] list records\n"); 449 printf(" ppt cat level index [filename] print record\n"); 450 printf(" ppt txt [filename] print text\n"); 451 printf(" ppt html [filename] print html\n"); 452 printf(" ppt extract [filename] extract pictures\n"); 453 printf(" ppt --help print help\n"); 454 printf(" ppt --version print version\n"); 455 return 0; 456 } 457 458 static int cmd_version(void) { 459 printf("%s\n", VERSION); 460 return 0; 461 } 462 463 int main(int argc, char *argv[]) { 464 char *cmd = *++argv; 465 if(!cmd) { 466 fprintf(stderr, "command expected\n"); 467 return 1; 468 } 469 ++argv; 470 if(!strcmp("ls", cmd)) return cmd_ls(argv); 471 else if(!strcmp("cat", cmd)) return cmd_cat(argv); 472 else if(!strcmp("txt", cmd)) return cmd_txt(argv); 473 else if(!strcmp("html", cmd)) return cmd_html(argv); 474 else if(!strcmp("extract", cmd)) return cmd_extract(argv); 475 else if(!strcmp("--help", cmd)) return cmd_help(); 476 else if(!strcmp("--version", cmd)) return cmd_version(); 477 else fprintf(stderr, "unexpected command %s\n", cmd); 478 return 1; 479 }