olefs

command line tools to extract data from OLE documents like doc, ppt, xls, msg
git clone https://logand.com/git/olefs.git/
Log | Files | Refs

commit a9018212f9f623c9e1cf5b43b85b792894e7b193
parent 4bf4078f1f14376874ae6804e5cadc62925e3001
Author: Tomas Hlavaty <tom@logand.com>
Date:   Tue,  9 Jul 2019 18:00:17 +0200

remove fuse dependency

Diffstat:
MMakefile | 29+++++++----------------------
AVERSION | 1+
Acfb.c | 512+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dcfbfs.c | 681-------------------------------------------------------------------------------
Adefault.nix | 14++++++++++++++
Dnopoint | 16----------------
Dodrawfs.c | 312-------------------------------------------------------------------------------
Appt.c | 455+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dppt2html.c | 105-------------------------------------------------------------------------------
Dpptdump.sh | 12------------
Dpptview.sh | 4----
Drhdump.c | 64----------------------------------------------------------------
Ashell.nix | 1+
Dutf8.h | 3---
14 files changed, 990 insertions(+), 1219 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,31 +1,16 @@ -ALL=cfbfs odrawfs rhdump ppt2html -#-std=c99 -CFLAGS=-g -Wall -#CFLAGS=-Wall -O2 +ALL=cfb ppt +CFLAGS=-Wall -Os LDFLAGS= -CFLAGSFUSE=-DFUSE_USE_VERSION=25 $(shell pkg-config fuse --cflags) -LDFLAGSFUSE=$(shell pkg-config fuse --libs) all: $(ALL) -cfbfs: cfbfs.c - $(CC) $(CFLAGS) $(CFLAGSFUSE) -o $@ $< $(LDFLAGS) $(LDFLAGSFUSE) -# strip $@ - -odrawfs: odrawfs.c - $(CC) $(CFLAGS) $(CFLAGSFUSE) -o $@ $< $(LDFLAGS) $(LDFLAGSFUSE) -# strip $@ - -rhdump: rhdump.c +cfb: cfb.c $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) -# strip $@ + strip $@ -utf8.o: utf8.c utf8.h - $(CC) $(CFLAGS) -o $@ -c $< - -ppt2html: ppt2html.c utf8.o - $(CC) $(CFLAGS) -o $@ $< utf8.o $(LDFLAGS) -# strip $@ +ppt: ppt.c + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + strip $@ clean: rm -f $(ALL) diff --git a/VERSION b/VERSION @@ -0,0 +1 @@ +"0.1" diff --git a/cfb.c b/cfb.c @@ -0,0 +1,512 @@ +// TODO version 4 with bigger sector size + +const char *VERSION = +#include "VERSION" + ; + +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdint.h> +//#include <iconv.h> // TODO wchar -> utf8 properly + +// MS-CFB Compound File Binary File Format + +#define UNUSED_SECTOR 0 +#define MAXREGSECT 0xfffffffa +#define DIFSECT 0xfffffffc +#define FATSECT 0xfffffffd +#define ENDOFCHAIN 0xfffffffe +#define FREESECT 0xffffffff + +#define MAXREGSIG 0xfffffffa +#define NOSTREAM 0xffffffff + +#define ENTRY_UNKNOWN 0 +#define ENTRY_STORAGE 1 +#define ENTRY_STREAM 2 +#define ENTRY_ROOT 5 + +typedef uint8_t byte; +typedef uint16_t ushort; +typedef uint16_t wchar; +typedef uint32_t dword; +typedef uint64_t filetime; +typedef uint64_t ulonglong; + +struct entry { + wchar name[32]; + ushort name_length; + byte object_type; + byte color_flag; + dword left_sibling_id; + dword right_sibling_id; + dword child_id; + byte clsid[16]; + dword state_bits; + filetime creation_time; + filetime modified_time; + dword starting_sector_location; + ulonglong stream_size; +}; + +struct chain { + dword location; + struct chain *next; +}; + +struct header { + byte signature[8]; + byte clsid[16]; + ushort minor_version; + ushort major_version; + ushort byte_order; + ushort sector_shift; + ushort mini_sector_shift; + byte reserved[6]; + dword number_of_directory_sectors; + dword number_of_fat_sectors; + dword first_directory_sector_location; + dword transaction_signature_number; + dword mini_stream_cutoff_size; + dword first_mini_fat_sector_location; + dword number_of_mini_fat_sectors; + dword first_difat_sector_location; + dword number_of_difat_sectors; +}; + +struct cfb_file { + FILE *stream; + //static iconv_t conv; + struct header header; + dword difat_length; + dword *difat; + dword fat_length; + dword *fat; + struct chain *directory_chain; + int directories_length; + struct entry *directories; + struct chain *mfat_chain; + int mfat_length; + dword *mfat; +}; + +static void seek_sector(FILE *stream, dword location) { + fseek(stream, (1 + location) * 512, SEEK_SET); +} + +static void read_byte(FILE *stream, byte *place, int count) { + size_t n = fread(place, sizeof(byte), count, stream); + assert(n == count); +} + +static void read_wchar(FILE *stream, wchar *place, int count) { + size_t n = fread(place, sizeof(wchar), count, stream); + assert(n == count); +} + +static void read_ushort(FILE *stream, ushort *place, int count) { + size_t n = fread(place, sizeof(ushort), count, stream); + assert(n == count); +} + +static void read_dword(FILE *stream, dword *place, int count) { + size_t n = fread(place, sizeof(dword), count, stream); + assert(n == count); +} + +static void read_filetime(FILE *stream, filetime *place, int count) { + size_t n = fread(place, sizeof(filetime), count, stream); + assert(n == count); +} + +static void read_ulonglong(FILE *stream, ulonglong *place, int count) { + size_t n = fread(place, sizeof(ulonglong), count, stream); + assert(n == count); +} + +static void read_header(FILE *stream, struct header *x) { + read_byte(stream, x->signature, 8); + read_byte(stream, x->clsid, 16); + read_ushort(stream, &x->minor_version, 1); + read_ushort(stream, &x->major_version, 1); + read_ushort(stream, &x->byte_order, 1); + read_ushort(stream, &x->sector_shift, 1); + read_ushort(stream, &x->mini_sector_shift, 1); + read_byte(stream, x->reserved, 6); + read_dword(stream, &x->number_of_directory_sectors, 1); + read_dword(stream, &x->number_of_fat_sectors, 1); + read_dword(stream, &x->first_directory_sector_location, 1); + read_dword(stream, &x->transaction_signature_number, 1); + read_dword(stream, &x->mini_stream_cutoff_size, 1); + read_dword(stream, &x->first_mini_fat_sector_location, 1); + read_dword(stream, &x->number_of_mini_fat_sectors, 1); + read_dword(stream, &x->first_difat_sector_location, 1); + read_dword(stream, &x->number_of_difat_sectors, 1); +} + +static void check_header(struct header *x) { + static const byte expected_signature[8] = + {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1}; + static const byte expected_reserved[6] = {0, 0, 0, 0, 0, 0}; + assert(!memcmp(x->signature, expected_signature, 8)); + /* ;;(assert (equalp clsid_null (ole_header.clsid x))) */ + assert(0xfffe == x->byte_order); + assert(!memcmp(x->reserved, expected_reserved, 6)); + assert(3 == x->major_version); + assert(512 == (1 << x->sector_shift)); + assert(64 == (1 << x->mini_sector_shift)); + assert(0 == x->number_of_directory_sectors); + /* ;;(assert (eql 0xfffffffe (first_directory_sector_location x))) */ + assert(0 == x->transaction_signature_number); + assert(4096 == x->mini_stream_cutoff_size); + /* ;;(assert (eql 0xfffffffe (first_mini_fat_sector_location x))) */ + if(x->number_of_difat_sectors <= 0) + assert(0xfffffffe == x->first_difat_sector_location); +} + +static void read_entry(FILE *stream, struct entry *entry) { + read_wchar(stream, &entry->name[0], 32); + read_ushort(stream, &entry->name_length, 1); + read_byte(stream, &entry->object_type, 1); + read_byte(stream, &entry->color_flag, 1); + read_dword(stream, &entry->left_sibling_id, 1); + read_dword(stream, &entry->right_sibling_id, 1); + read_dword(stream, &entry->child_id, 1); + read_byte(stream, &entry->clsid[0], 16); + read_dword(stream, &entry->state_bits, 1); + read_filetime(stream, &entry->creation_time, 1); + read_filetime(stream, &entry->modified_time, 1); + read_dword(stream, &entry->starting_sector_location, 1); + read_ulonglong(stream, &entry->stream_size, 1); +}; + +static void print_bytes(byte *place, int count) { + int i; + for(i = 0; i < count; i++) { + printf("%s%02x", (0 < i ? ":" : ""), place[i]); + } +} + +static void print_ushort(ushort x) { + printf("%u 0x%04x", x, x); +} + +static void print_dword(dword x) { + printf("%u 0x%08x", x, x); +} + +static void read_difat (FILE *stream, struct cfb_file *x) { + x->difat_length = 109 + ((512 - 4) / 4) * x->header.number_of_difat_sectors; + x->difat = calloc(x->difat_length, sizeof(dword)); + read_dword(stream, x->difat, 109); + dword n = x->header.first_difat_sector_location, i = 109, m = 512 / 4 - 1; + for(; n != ENDOFCHAIN; read_dword(stream, &n, 1), i += m) { + seek_sector(stream, n); + read_dword(stream, &x->difat[i], m); + } +} + +static void read_fat (FILE *stream, struct cfb_file *x) { + dword m = 512 / 4; + x->fat_length = m * x->difat_length; + x->fat = calloc(x->fat_length, sizeof(dword)); + int i; + for(i = 0; i < x->difat_length; i++) { + dword s = x->difat[i]; + if(s != FREESECT) { + seek_sector(stream, s); + read_dword(stream, &x->fat[i * m], m); + } + } +} + +static struct chain *make_chain(dword location, struct chain *next) { + struct chain *x = malloc(sizeof(struct chain)); + x->location = location; + x->next = next; + return x; +} + +static int chain_length(struct chain *x) { + int i = 0; + for(; x; x = x->next) i++; + return i; +} + +static struct chain *nth_chain(struct chain *chain, int n) { + if(0 <= n) { + for(; chain; chain = chain->next) { + if(n <= 0) + return chain; + n--; + } + } + return NULL; +} + +static struct chain *sector_chain(dword *fat, dword location) { + struct chain *x = NULL; + switch(location) { + case DIFSECT: + case FATSECT: + case ENDOFCHAIN: + case FREESECT: + break; + default: + assert(0 <= location && location <= MAXREGSECT); + x = make_chain(location, sector_chain(fat, fat[location])); + } + return x; +} + +static void read_directories (FILE *stream, struct cfb_file *x) { + dword m = 512 / 128; + x->directories_length = m * chain_length(x->directory_chain); + x->directories = calloc(x->directories_length, sizeof(struct entry)); + int i = 0; + for(struct chain *c = x->directory_chain; c; c = c->next) { + seek_sector(stream, c->location); + int j; + for(j = 0; j < m; j++) + read_entry(stream, &x->directories[i++]); + } +} + +static void read_mfat (FILE *stream, struct cfb_file *x) { + dword m = 512 / 4; + x->mfat_length = m * chain_length(x->mfat_chain); + x->mfat = calloc(x->mfat_length, sizeof(dword)); + int i = 0; + for(struct chain *c = x->mfat_chain; c; c = c->next) { + seek_sector(stream, c->location); + read_dword(stream, &x->mfat[i++ * m], m); + } +} + +static void open_cfb_file(char *filename, struct cfb_file *x) { + FILE *stream = fopen(filename, "r"); + if(!stream) { + fprintf(stderr, "Unable to open '%s'.\n", filename); + exit(1); + } + //conv = iconv_open("UTF-8", "UTF-16LE"); //"UCS-2"); //"UCS2-LE"); + x->stream = stream; + read_header(stream, &x->header); + check_header(&x->header); + read_difat(stream, x); + read_fat(stream, x); + x->directory_chain = sector_chain(x->fat, x->header.first_directory_sector_location); + read_directories(stream, x); + x->mfat_chain = sector_chain(x->fat, x->header.first_mini_fat_sector_location); + read_mfat(stream, x); +} + +static void print_cfb_file(struct cfb_file *x) { + printf("signature "); + print_bytes(x->header.signature, 8); + printf("\nclsid "); + print_bytes(x->header.clsid, 16); + printf("\nminor_version "); + print_ushort(x->header.minor_version); + printf("\nmajor_version "); + print_ushort(x->header.major_version); + printf("\nbyte_order "); + print_ushort(x->header.byte_order); + printf("\nsector_shift "); + print_ushort(x->header.sector_shift); + printf("\nmini_sector_shift "); + print_ushort(x->header.mini_sector_shift); + printf("\nreserved "); + print_bytes(x->header.reserved, 6); + printf("\nnumber_of_directory_sectors "); + print_dword(x->header.number_of_directory_sectors); + printf("\nnumber_of_fat_sectors "); + print_dword(x->header.number_of_fat_sectors); + printf("\nfirst_directory_sector_location "); + print_dword(x->header.first_directory_sector_location); + printf("\ntransaction_signature_number "); + print_dword(x->header.transaction_signature_number); + printf("\nmini_stream_cutoff_size "); + print_dword(x->header.mini_stream_cutoff_size); + printf("\nfirst_mini_fat_sector_location "); + print_dword(x->header.first_mini_fat_sector_location); + printf("\nnumber_of_mini_fat_sectors "); + print_dword(x->header.number_of_mini_fat_sectors); + printf("\nfirst_difat_sector_location "); + print_dword(x->header.first_difat_sector_location); + printf("\nnumber_of_difat_sectors "); + print_dword(x->header.number_of_difat_sectors); + //printf("\n"); + printf("\ndifat_length "); + print_dword(x->difat_length); + //dword *difat; + printf("\nfat_length "); + print_dword(x->fat_length); + //dword *fat; + //struct chain *directory_chain; + printf("\ndirectories_length %d", x->directories_length); + //struct entry *directories; + //struct chain *mfat_chain; + printf("\nmfat_length %d", x->mfat_length); + //dword *mfat; + printf("\n"); +} + +static void cat(struct entry *e, struct cfb_file *f) { + int mini = e->stream_size < f->header.mini_stream_cutoff_size; + struct chain *chain = sector_chain(f->fat, (mini ? f->directories : e)->starting_sector_location); + struct chain *mchain = mini ? sector_chain(f->mfat, e->starting_sector_location) : NULL; + dword sector = -1; + byte buffer[512]; + for(int i = 0; i < e->stream_size; i++) { + dword rr; + dword q; + if(mchain) { + dword mq = i / 64; + dword mr = i % 64; + dword s = nth_chain(mchain, mq)->location; + q = s / (512 / 64); + dword r = s % (512 / 64); + rr = (64 * r) + mr; + } else { + q = i / 512; + dword r = i % 512; + rr = r; + } + if(sector != q) { + seek_sector(f->stream, nth_chain(chain, q)->location); + size_t n = fread(buffer, sizeof(byte), 512, f->stream); + assert(512 == n); + sector = q; + } + fwrite(&buffer[rr], 1, 1, stdout); + } +} + +static size_t xconv(wchar *iname, char *oname, size_t length) { + /* size_t ileft = length, oleft; */ + /* return iconv(conv, (char **) &iname, &ileft, &oname, &oleft); */ + int i; + for(i = 0; i < length / sizeof(wchar); i++) + oname[i] = iname[i]; + return 0; +} + +static int walk(struct cfb_file *f, char *path, dword id, char *parent) { + struct entry *e = &f->directories[id]; + if(e->object_type == ENTRY_STORAGE + || e->object_type == ENTRY_STREAM + || e->object_type == ENTRY_ROOT) { + char name[32 * sizeof(wchar)]; + xconv(e->name, name, e->name_length); + size_t len = strlen(parent) + 1 + strlen(name) + 1; + char child[len]; + snprintf(child, len, "%s/%s", parent, name); + if(path) { + if(!strcmp(path, child)) { + cat(e, f); + return 0; + } + } else { + if(e->object_type == ENTRY_STREAM) { + printf("f %10lu %s\n", e->stream_size, child); + } else { + printf("d %10u %s\n", 0, child); + } + } + dword n1 = e->left_sibling_id; + if(n1 <= MAXREGSIG) + if(!walk(f, path, n1, parent)) + return 0; + dword n2 = e->child_id; + if(n2 <= MAXREGSIG) + if(!walk(f, path, n2, child)) + return 0; + dword n3 = e->right_sibling_id; + if(n3 <= MAXREGSIG) + if(!walk(f, path, n3, parent)) + return 0; + } + return 1; +} + +static void usage(FILE *stream) { + fprintf(stream, "Usage:\n"); + fprintf(stream, " cfb ls filename list files\n"); + fprintf(stream, " cfb cat filename path write file to stdout\n"); + fprintf(stream, " cfb info filename print info\n"); + fprintf(stream, " cfb --help print help\n"); + fprintf(stream, " cfb --version print version\n"); +} + +static int cmd_ls(char *argv[]) { + char *filename = argv[0]; + if(!filename) { + usage(stderr); + return 1; + } + struct cfb_file cfb_file; + open_cfb_file(filename, &cfb_file); + walk(&cfb_file, NULL, 0, ""); + return 0; +} + +static int cmd_cat(char *argv[]) { + char *filename = argv[0]; + if(!filename) { + usage(stderr); + return 1; + } + char *path = argv[1]; + if(!path) { + usage(stderr); + return 1; + } + struct cfb_file cfb_file; + open_cfb_file(filename, &cfb_file); + walk(&cfb_file, path, 0, ""); + return 0; +} + +static int cmd_info(char *argv[]) { + char *filename = argv[0]; + if(!filename) { + usage(stderr); + return 1; + } + struct cfb_file cfb_file; + open_cfb_file(filename, &cfb_file); + print_cfb_file(&cfb_file); + return 0; +} + +static int cmd_help(void) { + usage(stdout); + return 0; +} + +static int cmd_version(void) { + printf("%s\n", VERSION); + return 0; +} + +int main(int argc, char **argv) { + char *cmd = *++argv; + if(!cmd) { + usage(stderr); + return 1; + } + ++argv; + if(!strcmp("ls", cmd)) return cmd_ls(argv); + else if(!strcmp("cat", cmd)) return cmd_cat(argv); + else if(!strcmp("info", cmd)) return cmd_info(argv); + else if(!strcmp("--help", cmd)) return cmd_help(); + else if(!strcmp("--version", cmd)) return cmd_version(); + else usage(stderr); + return 1; +} diff --git a/cfbfs.c b/cfbfs.c @@ -1,681 +0,0 @@ -// TODO version 4 with bigger sector size - -#include <fuse.h> -#include <stdlib.h> -#include <assert.h> // TODO dont use assert! -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <fcntl.h> -#include <unistd.h> -//#include <iconv.h> // TODO wchar -> utf8 properly - -// MS-CFB Compound File Binary File Format - -#define UNUSED_SECTOR 0 -#define MAXREGSECT 0xfffffffa -#define DIFSECT 0xfffffffc -#define FATSECT 0xfffffffd -#define ENDOFCHAIN 0xfffffffe -#define FREESECT 0xffffffff - -#define MAXREGSIG 0xfffffffa -#define NOSTREAM 0xffffffff - -#define ENTRY_UNKNOWN 0 -#define ENTRY_STORAGE 1 -#define ENTRY_STREAM 2 -#define ENTRY_ROOT 5 - -typedef uint8_t byte; -typedef uint16_t ushort; -typedef uint16_t wchar; -typedef uint32_t dword; -typedef uint64_t filetime; -typedef uint64_t ulonglong; - -struct entry { - wchar name[32]; - ushort name_length; - byte object_type; - byte color_flag; - dword left_sibling_id; - dword right_sibling_id; - dword child_id; - byte clsid[16]; - dword state_bits; - filetime creation_time; - filetime modified_time; - dword starting_sector_location; - ulonglong stream_size; -}; - -struct chain { - dword location; - struct chain *next; -}; - -#define PATH_MAX 1024 - -static char *filename; -static char cwd[PATH_MAX]; // fuse changes cwd:-{ -//static iconv_t conv; -static dword difat_length; -static dword *difat; -static dword fat_length; -static dword *fat; -static struct chain *directory_chain; -static int directories_length; -static struct entry *directories; -static struct chain *mfat_chain; -static int mfat_length; -static dword *mfat; - -// header -static byte signature[8]; -static byte clsid[16]; -static ushort minor_version; -static ushort major_version; -static ushort byte_order; -static ushort sector_shift; -static ushort mini_sector_shift; -static byte reserved[6]; -static dword number_of_directory_sectors; -static dword number_of_fat_sectors; -static dword first_directory_sector_location; -static dword transaction_signature_number; -static dword mini_stream_cutoff_size; -static dword first_mini_fat_sector_location; -static dword number_of_mini_fat_sectors; -static dword first_difat_sector_location; -static dword number_of_difat_sectors; - -static dword location_position(dword location) { - return (1 + location) * 512; -} - -static void seek_sector(FILE *stream, dword location) { - dword position = location_position(location); - fseek(stream, position, SEEK_SET); -} - -static void read_byte(FILE *stream, byte *place, int count) { - fread(place, sizeof(byte), count, stream); -} - -static void read_wchar(FILE *stream, wchar *place, int count) { - fread(place, sizeof(wchar), count, stream); -} - -static void read_ushort(FILE *stream, ushort *place, int count) { - fread(place, sizeof(ushort), count, stream); -} - -static void read_dword(FILE *stream, dword *place, int count) { - fread(place, sizeof(dword), count, stream); -} - -static void read_filetime(FILE *stream, filetime *place, int count) { - fread(place, sizeof(filetime), count, stream); -} - -static void read_ulonglong(FILE *stream, ulonglong *place, int count) { - fread(place, sizeof(ulonglong), count, stream); -} - -static void read_header(FILE *stream) { - read_byte(stream, signature, 8); - read_byte(stream, clsid, 16); - read_ushort(stream, &minor_version, 1); - read_ushort(stream, &major_version, 1); - read_ushort(stream, &byte_order, 1); - read_ushort(stream, &sector_shift, 1); - read_ushort(stream, &mini_sector_shift, 1); - read_byte(stream, reserved, 6); - read_dword(stream, &number_of_directory_sectors, 1); - read_dword(stream, &number_of_fat_sectors, 1); - read_dword(stream, &first_directory_sector_location, 1); - read_dword(stream, &transaction_signature_number, 1); - read_dword(stream, &mini_stream_cutoff_size, 1); - read_dword(stream, &first_mini_fat_sector_location, 1); - read_dword(stream, &number_of_mini_fat_sectors, 1); - read_dword(stream, &first_difat_sector_location, 1); - read_dword(stream, &number_of_difat_sectors, 1); -} - -static const byte expected_signature[8] = - {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1}; - -static const byte expected_reserved[6] = {0, 0, 0, 0, 0, 0}; - -static void check_header() { - assert(!memcmp(signature, expected_signature, 8)); -/* ;;(assert (equalp clsid_null (ole_header.clsid x))) */ - assert(0xfffe == byte_order); - assert(!memcmp(reserved, expected_reserved, 6)); - assert(3 == major_version); - assert(512 == (1 << sector_shift)); - assert(64 == (1 << mini_sector_shift)); - assert(0 == number_of_directory_sectors); - /* ;;(assert (eql 0xfffffffe (first_directory_sector_location x))) */ - assert(0 == transaction_signature_number); - assert(4096 == mini_stream_cutoff_size); - /* ;;(assert (eql 0xfffffffe (first_mini_fat_sector_location x))) */ - if(number_of_difat_sectors <= 0) - assert(0xfffffffe == first_difat_sector_location); -} - -static void read_entry(FILE *stream, struct entry *entry) { - read_wchar(stream, &entry->name[0], 32); - read_ushort(stream, &entry->name_length, 1); - read_byte(stream, &entry->object_type, 1); - read_byte(stream, &entry->color_flag, 1); - read_dword(stream, &entry->left_sibling_id, 1); - read_dword(stream, &entry->right_sibling_id, 1); - read_dword(stream, &entry->child_id, 1); - read_byte(stream, &entry->clsid[0], 16); - read_dword(stream, &entry->state_bits, 1); - read_filetime(stream, &entry->creation_time, 1); - read_filetime(stream, &entry->modified_time, 1); - read_dword(stream, &entry->starting_sector_location, 1); - read_ulonglong(stream, &entry->stream_size, 1); -}; - -static void print_bytes(FILE *stream, byte *place, int count) { - int i; - for(i = 0; i < count; i++) { - fprintf(stream, "%s%02x", (0 < i ? ":" : ""), place[i]); - } -} - -static void print_ushort(FILE *stream, ushort x) { - fprintf(stream, "%u 0x%04x", x, x); -} - -static void print_dword(FILE *stream, dword x) { - fprintf(stream, "%u 0x%08x", x, x); -} - -static void print_header(FILE *stream) { - fprintf(stream, "signature "); - print_bytes(stream, signature, 8); - fprintf(stream, "\nclsid "); - print_bytes(stream, clsid, 16); - fprintf(stream, "\nminor_version "); - print_ushort(stream, minor_version); - fprintf(stream, "\nmajor_version "); - print_ushort(stream, major_version); - fprintf(stream, "\nbyte_order "); - print_ushort(stream, byte_order); - fprintf(stream, "\nsector_shift "); - print_ushort(stream, sector_shift); - fprintf(stream, "\nmini_sector_shift "); - print_ushort(stream, mini_sector_shift); - fprintf(stream, "\nreserved "); - print_bytes(stream, reserved, 6); - fprintf(stream, "\nnumber_of_directory_sectors "); - print_dword(stream, number_of_directory_sectors); - fprintf(stream, "\nnumber_of_fat_sectors "); - print_dword(stream, number_of_fat_sectors); - fprintf(stream, "\nfirst_directory_sector_location "); - print_dword(stream, first_directory_sector_location); - fprintf(stream, "\ntransaction_signature_number "); - print_dword(stream, transaction_signature_number); - fprintf(stream, "\nmini_stream_cutoff_size "); - print_dword(stream, mini_stream_cutoff_size); - fprintf(stream, "\nfirst_mini_fat_sector_location "); - print_dword(stream, first_mini_fat_sector_location); - fprintf(stream, "\nnumber_of_mini_fat_sectors "); - print_dword(stream, number_of_mini_fat_sectors); - fprintf(stream, "\nfirst_difat_sector_location "); - print_dword(stream, first_difat_sector_location); - fprintf(stream, "\nnumber_of_difat_sectors "); - print_dword(stream, number_of_difat_sectors); - //fprintf(stream, "\n"); - fprintf(stream, "\nfilename %s", filename); - fprintf(stream, "\ndirectory %s", cwd); - fprintf(stream, "\ndifat_length "); - print_dword(stream, difat_length); - fprintf(stream, "\nfat_length "); - print_dword(stream, fat_length); - fprintf(stream, "\ndirectories_length %d", directories_length); - fprintf(stream, "\nmfat_length %d", mfat_length); - fprintf(stream, "\n"); -} - -static void read_difat (FILE *stream) { - difat_length = 109 + ((512 - 4) / 4) * number_of_difat_sectors; - difat = calloc(difat_length, sizeof(dword)); - read_dword(stream, difat, 109); - dword n = first_difat_sector_location, i = 109, m = 512 / 4 - 1; - for(; n != ENDOFCHAIN; read_dword(stream, &n, 1), i += m) { - seek_sector(stream, n); - read_dword(stream, &difat[i], m); - } -} - -static void read_fat (FILE *stream) { - dword m = 512 / 4; - fat_length = m * difat_length; - fat = calloc(fat_length, sizeof(dword)); - int i; - for(i = 0; i < difat_length; i++) { - dword s = difat[i]; - if(s != FREESECT) { - seek_sector(stream, s); - read_dword(stream, &fat[i * m], m); - } - } -} - -static struct chain *make_chain(dword location, struct chain *next) { - struct chain *x = malloc(sizeof(struct chain)); - x->location = location; - x->next = next; - return x; -} - -#define FOREACH_CHAIN(chain) for(; chain; chain = chain->next) - -static void free_chain(struct chain *x) { - struct chain *next; - for(; x; x = next) { - next = x->next; - free(x); - } -} - -static int chain_length(struct chain *chain) { - int i = 0; - FOREACH_CHAIN(chain) i++; - return i; -} - -static struct chain *nth_chain(struct chain *chain, int n) { - if(0 <= n) { - FOREACH_CHAIN(chain) { - if(n == 0) - return chain; - n--; - } - } - return NULL; -} - -static struct chain *sector_chain(dword *fat, dword location) { - struct chain *x = NULL; - switch(location) { - case DIFSECT: - case FATSECT: - case ENDOFCHAIN: - case FREESECT: - break; - default: - assert(0 <= location && location <= MAXREGSECT); - x = make_chain(location, sector_chain(fat, fat[location])); - } - return x; -} - -static void read_directories (FILE *stream) { - dword m = 512 / 128; - directories_length = m * chain_length(directory_chain); - directories = calloc(directories_length, sizeof(struct entry)); - int i = 0; - struct chain *x = directory_chain; - FOREACH_CHAIN(x) { - seek_sector(stream, x->location); - int j; - for(j = 0; j < m; j++) - read_entry(stream, &directories[i++]); - } -} - -static void read_mfat (FILE *stream) { - dword m = 512 / 4; - mfat_length = m * chain_length(mfat_chain); - mfat = calloc(mfat_length, sizeof(dword)); - int i = 0; - struct chain *x = mfat_chain; - FOREACH_CHAIN(x) { - seek_sector(stream, x->location); - read_dword(stream, &mfat[i++ * m], m); - } -} - -static const char *header_path = "/header"; - -static void print_header_to_string(char **str, size_t *size) { - FILE *stream = open_memstream(str, size); - print_header(stream); - fflush(stream); - fclose(stream); -} - -static size_t xconv(wchar *iname, char *oname, size_t length) { - /* size_t ileft = length, oleft; */ - /* return iconv(conv, (char **) &iname, &ileft, &oname, &oleft); */ - int i; - for(i = 0; i < length / sizeof(wchar); i++) - oname[i] = iname[i]; - return 0; -} - -typedef int (*walk_directory_cb)(void *env, struct entry *entry, dword id, - char *path, char *name, char *parent_path); - -static void walk_directory(void *env, walk_directory_cb cb, dword id, char *path) { - struct entry *x = &directories[id]; - if(x->object_type == ENTRY_STORAGE - || x->object_type == ENTRY_STREAM - || x->object_type == ENTRY_ROOT) { - char name[32 * sizeof(wchar)]; - xconv(x->name, name, x->name_length); - size_t xlen = strlen(path) + 1 + strlen(name) + 1; - char xpath[xlen]; - snprintf(xpath, xlen, "%s/%s", path, name); - int descend = cb(env, x, id, xpath, name, path); - dword n; - n = x->left_sibling_id; - if(n <= MAXREGSIG) - walk_directory(env, cb, n, path); - if(descend) { - n = x->child_id; - if(n <= MAXREGSIG) - walk_directory(env, cb, n, xpath); - } - n = x->right_sibling_id; - if(n <= MAXREGSIG) - walk_directory(env, cb, n, path); - } -} - -struct getattr_walk { - const char *path; - struct stat *stbuf; - int result; -}; - -static int getattr_walk_cb(void *env, struct entry *entry, dword id, - char *path, char *name, char *parent_path) { - struct getattr_walk *e = env; - if(!strcmp(path, e->path)) { - switch(entry->object_type) { - case ENTRY_STORAGE: - e->stbuf->st_mode = S_IFDIR | 0755; - e->stbuf->st_nlink = 1; - e->stbuf->st_size = 0; - e->result = 0; - return 0; - case ENTRY_STREAM: - e->stbuf->st_mode = S_IFREG | 0444; - e->stbuf->st_nlink = 1; - e->stbuf->st_size = entry->stream_size; - e->result = 0; - return 0; - case ENTRY_ROOT: - e->stbuf->st_mode = S_IFDIR | 0755; - e->stbuf->st_nlink = 1; - e->stbuf->st_size = 0; - e->result = 0; - return 0; - } - } - return 1; -} - -static int olefs_getattr(const char *path, struct stat *stbuf) { - int res = 0; - memset(stbuf, 0, sizeof(struct stat)); - if(strcmp(path, "/") == 0) { - stbuf->st_mode = S_IFDIR | 0755; - stbuf->st_nlink = 2; - } - else if(strcmp(path, header_path) == 0) { - stbuf->st_mode = S_IFREG | 0444; - stbuf->st_nlink = 1; - size_t len; - char *header_str; - print_header_to_string(&header_str, &len); - free(header_str); - stbuf->st_size = len; //0; //strlen(header_str); - } - else { - struct getattr_walk e = {path, stbuf, -ENOENT}; - walk_directory(&e, getattr_walk_cb, 0, ""); - return e.result; - } - return res; -} - -struct readdir_walk { - const char *path; - void *buf; - fuse_fill_dir_t filler; - int result; -}; - -static int readdir_walk_cb(void *env, struct entry *entry, dword id, - char *path, char *name, char *parent_path) { - struct readdir_walk *e = env; - if(!strcmp(parent_path, e->path)) { - e->filler(e->buf, name, NULL, 0); - e->result = 0; - return 0; - } - return 1; -} - -static int olefs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, - off_t offset, struct fuse_file_info *fi) { - (void) offset; - (void) fi; - struct entry *root = &directories[0]; - char name[32 * sizeof(wchar)]; - xconv(root->name, name, root->name_length); - if(strcmp(path, "/") == 0) { - filler(buf, ".", NULL, 0); - filler(buf, "..", NULL, 0); - filler(buf, header_path + 1, NULL, 0); - filler(buf, name, NULL, 0); - } - else { - struct readdir_walk e = {path, buf, filler, -ENOENT}; - walk_directory(&e, readdir_walk_cb, 0, ""); - return e.result; - } - return 0; -} - -struct open_walk { - const char *path; - struct fuse_file_info *fi; - int result; -}; - -static int open_walk_cb(void *env, struct entry *entry, dword id, - char *path, char *name, char *parent_path) { - struct open_walk *e = env; - if(!strcmp(path, e->path)) { - if(entry->object_type == ENTRY_STREAM) - e->result = (e->fi->flags & 3) != O_RDONLY ? -EACCES : 0; - return 0; - } - return 1; -} - -static int olefs_open(const char *path, struct fuse_file_info *fi) { - if(!strcmp(path, header_path)) { - if((fi->flags & 3) != O_RDONLY) - return -EACCES; - return 0; - } - struct open_walk e = {path, fi, -ENOENT}; - walk_directory(&e, open_walk_cb, 0, ""); - return e.result; -} - -struct entry_stream { - FILE *file; - dword offset; - struct chain *chain; - struct chain *mchain; - dword sector; - char buffer[512]; - dword size; -}; - -static void open_entry_stream(struct entry_stream *s, struct entry *e, FILE *f) { - int mini = e->stream_size < mini_stream_cutoff_size; - s->file = f; - s->offset = 0; - s->chain = sector_chain(fat, (mini ? directories : e)->starting_sector_location); - s->mchain = mini ? sector_chain(mfat, e->starting_sector_location) : NULL; - s->sector = -1; - s->size = e->stream_size; -} - -static void close_entry_stream(struct entry_stream *s) { - //fclose(s->file); - free_chain(s->chain); - free_chain(s->mchain); -} - -static void seek_entry_stream(struct entry_stream *stream, off_t off) { - stream->offset = off; - stream->sector = -1; -} - -static void stream_read_byte_pick(struct entry_stream *x, dword q, dword r, - char *byte) { - if(x->sector != q) { - seek_sector(x->file, nth_chain(x->chain, q)->location); - int n = fread(x->buffer, sizeof(char), 512, x->file); - assert(512 == n); - x->sector = q; - } - *byte = x->buffer[r]; - x->offset++; -} - -static int stream_read_byte(struct entry_stream *x, char *byte) { - if(x->offset < x->size) { - if(x->mchain) { - dword mq = x->offset / 64, mr = x->offset % 64; - dword s = nth_chain(x->mchain, mq)->location; - dword q = s / (512 / 64), r = s % (512 / 64); - stream_read_byte_pick(x, q, (64 * r) + mr, byte); - } - else { - dword q = x->offset / 512, r = x->offset % 512; - stream_read_byte_pick(x, q, r, byte); - } - return 1; - } - return 0; -} - -static int copy_entry_data_from(char *path, struct entry *entry, - char *buf, size_t size, off_t off) { - int n = 0; - FILE *f = fopen(path, "r"); // TODO move to open_entry_stream - if(f) { - struct entry_stream s; - open_entry_stream(&s, entry, f); - seek_entry_stream(&s, off); - for(; 0 < size; size--, n++) - if(!stream_read_byte(&s, &buf[n])) // TODO read block instead of bytes - break; - close_entry_stream(&s); - fclose(f); // TODO move to close_entry_stream - } - return n; -} - -static int copy_entry_data(struct entry *entry, char *buf, size_t size, off_t off) { - int n = 0; - if(0 < size && 0 <= off && off < entry->stream_size) { - if('/' == filename[0]) - n = copy_entry_data_from(filename, entry, buf, size, off); - else { - char x[PATH_MAX]; - snprintf(x, PATH_MAX, "%s/%s", cwd, filename); - n = copy_entry_data_from(x, entry, buf, size, off); - } - } - return n; -} - -struct read_walk { - const char *path; - char *buf; - size_t size; - off_t offset; - struct fuse_file_info *fi; - int result; -}; - -static int read_walk_cb(void *env, struct entry *entry, dword id, - char *path, char *name, char *parent_path) { - struct read_walk *e = env; - if(!strcmp(path, e->path)) { - e->result = copy_entry_data(entry, e->buf, e->size, e->offset); - return 0; - } - return 1; -} - -static int olefs_read(const char *path, char *buf, size_t size, off_t offset, - struct fuse_file_info *fi) { - if(!strcmp(path, header_path)) { - size_t len; - char *header_str; - print_header_to_string(&header_str, &len); - /* len = strlen(header_str); */ - if (offset < len) { - if (offset + size > len) - size = len - offset; - memcpy(buf, header_str + offset, size); - } else - size = 0; - free(header_str); - return size; - } - struct read_walk e = {path, buf, size, offset, fi, -ENOENT}; - walk_directory(&e, read_walk_cb, 0, ""); - return e.result; -} - -static struct fuse_operations olefs_operations = { - .readdir = olefs_readdir, - .getattr = olefs_getattr, - .open = olefs_open, - .read = olefs_read, -}; - -int main(int argc, char *argv[]) { - if(argc < 3) { - fprintf(stderr, "Usage: %s filename directory\n", argv[0]); - exit(-1); - } - filename = argv[1]; - argv++; - argc--; - getcwd(cwd, PATH_MAX); - FILE *stream = fopen(filename, "r"); - if(!stream) { - fprintf(stderr, "Unable to open '%s'.\n", filename); - exit(-1); - } - //conv = iconv_open("UTF-8", "UTF-16LE"); //"UCS-2"); //"UCS2-LE"); - read_header(stream); - check_header(); - read_difat(stream); - read_fat(stream); - directory_chain = sector_chain(fat, first_directory_sector_location); - read_directories(stream); - mfat_chain = sector_chain(fat, first_mini_fat_sector_location); - read_mfat(stream); - fclose(stream); - return fuse_main(argc, argv, &olefs_operations); -} diff --git a/default.nix b/default.nix @@ -0,0 +1,14 @@ +{ stdenv, fetchgit }: + +stdenv.mkDerivation rec { + version = import ./VERSION; + name = "olefs-${version}"; + src = ./.; + installPhase = '' + mkdir -p $out/bin + cp cfb ppt $out/bin + ''; + meta = { + license = stdenv.lib.licenses.gpl3Plus; + }; +} diff --git a/nopoint b/nopoint @@ -1,16 +0,0 @@ -#!/bin/sh -FILE=$1 -#file $FILE | grep CDF -DIR1=$(mktemp -d -q /tmp/nopoint-XXXXXX) -DIR2=$(mktemp -d -q /tmp/nopoint-XXXXXX) -export DIR2 -TMP=$(mktemp -q /tmp/nopoint-XXXXXX) -rm $TMP -HTML=$TMP.html -echo '<pre>' >$HTML -echo "$FILE" >>$HTML -~/bin/cfbfs "$FILE" $DIR1 2>&1 >>$HTML -~/bin/odrawfs "$DIR1/Root Entry/Pictures" $DIR2 2>&1 >>$HTML -echo '</pre>' >>$HTML -ls -v1 $DIR2 | awk '{print "<p><img src=\"" ENVIRON["DIR2"] "/" $0 "\"><p>" $0}' >>$HTML -firefox $HTML diff --git a/odrawfs.c b/odrawfs.c @@ -1,312 +0,0 @@ -// TODO proper little endian read_ - -#include <fuse.h> -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <unistd.h> - -typedef uint8_t byte; -typedef uint32_t dword; - -#define PATH_MAX 1024 - -static char *filename; -static char cwd[PATH_MAX]; // fuse changes cwd:-{ - -struct peep_stream { - FILE *wrap; - off_t start; - size_t size; - off_t offset; -}; - -static int peep_stream_eof(struct peep_stream *s) { - return !(s->offset < s->size); -} - -static int peep_stream_read(struct peep_stream *s, void *buf, size_t size) { - int left = s->size - s->offset; - if(0 < left) { - int n = left < size ? left : size; - int m = fread(buf, sizeof(byte), n, s->wrap); - if(0 < m) - s->offset += m; - return m; - } - return 0; -} - -static void peep_stream_seek(struct peep_stream *s, off_t offset) { - s->offset = offset; - if(!peep_stream_eof(s)) - fseek(s->wrap, s->start + s->offset, SEEK_SET); -} - -static size_t read_guid(FILE *s, byte guid[]) { - return fread(guid, 16, 1, s); -} - -// MS-PPT PowerPoint (.ppt) Binary File Format - -struct RecordHeader { - ushort recVer: 4; //(logand #x0f %dummy1)) - ushort recInstance: 12; //(logior (ash %dummy2 4) (ash %dummy1 -4))) - ushort recType; - dword recLen; -} __attribute__((__packed__)); - -static size_t read_RecordHeader(FILE *stream, struct RecordHeader *x) { - return fread(x, sizeof(struct RecordHeader), 1, stream); -} - -// MS-ODRAW Office Drawing Binary File Format - -struct POINT { - dword x; - dword y; -} __attribute__((__packed__)); - -struct RECT { - dword left; - dword top; - dword right; - dword bottom; -} __attribute__((__packed__)); - -struct OfficeArtMetafileHeader { - dword cbSize; - struct RECT rcBounds; - struct POINT ptSize; - dword cbSave; - byte compression; // :member '(#x00 #xfe)) - byte filter; //:always #xfe)) -} __attribute__((__packed__)); - -static size_t read_OfficeArtMetafileHeader(FILE *s, - struct OfficeArtMetafileHeader *x) { - return fread(x, sizeof(struct OfficeArtMetafileHeader), 1, s); -} - -static const struct OfficeArtBlip_config { - ushort recType; - ushort recInstance[4]; - char *ext; - ushort guid2[2]; - int metafileHeader; -} OfficeArtBlip_config[] = { - {0xF01A, {0x3d4, 0x3d5}, "emf", {0x3d5}, 1}, - {0xF01B, {0x216, 0x217}, "wmf", {0x217}, 1}, - {0xF01C, {0x542, 0x543}, "pict", {0x543}, 1}, - {0xF01D, {0x46A, 0x46B, 0x6E2, 0x6E3}, "jpeg", {0x46B, 0x6E3}, 0}, - {0xF01E, {0x6e0, 0x6e1}, "png", {0x6e1}, 0}, - {0xF01F, {0x7a8, 0x7a9}, "dib", {0x7a9}, 0}, - {0xF029, {0x6e4, 0x6e5}, "tiff", {0x6e5}, 0}, - {0xF02A, {0x46A, 0x46B, 0x6E2, 0x6E3}, "jpeg", {0x46B, 0x6E3}, 0}, -}; - -static int member(ushort x, const ushort a[]) { - int i; - for(i = 0; i < sizeof(a); i++) - if(a[i] == x) - return 1; - return 0; -} - -struct header { - struct RecordHeader h; - int i; - off_t fpos; - struct header *next; -}; - -static struct header *headers; - -static const struct OfficeArtBlip_config *header_config(struct header *h) { - int i; - for(i = 0; i < sizeof(OfficeArtBlip_config); i++) { - const struct OfficeArtBlip_config *c = &OfficeArtBlip_config[i]; - if(c->recType == h->h.recType) - return c; - } - return NULL; -} - -static const char *header_ext(struct header *h) { - const struct OfficeArtBlip_config *c = header_config(h); - return c ? c->ext : NULL; -} - -static const char *header_path(struct header *h, char *buf, size_t len, int slash) { - const char *ext = header_ext(h); - if(ext) - snprintf(buf, len, "%s%d.%s", slash ? "/" : "", h->i, ext); - return ext; -} - -static int odrawfs_getattr(const char *path, struct stat *stbuf) { - int res = 0; - memset(stbuf, 0, sizeof(struct stat)); - if(strcmp(path, "/") == 0) { - stbuf->st_mode = S_IFDIR | 0755; - stbuf->st_nlink = 2; - } - else { - struct header *h; - for(h = headers; h; h = h->next) { - char name[PATH_MAX]; - if(!header_path(h, name, PATH_MAX, 1)) - continue; - if(!strcmp(path, name)) { - stbuf->st_mode = S_IFREG | 0444; - stbuf->st_nlink = 1; - stbuf->st_size = h->h.recLen; - } - } - } - return res; -} - -static int odrawfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, - off_t offset, struct fuse_file_info *fi) { - (void) offset; - (void) fi; - if(strcmp(path, "/") == 0) { - filler(buf, ".", NULL, 0); - filler(buf, "..", NULL, 0); - struct header *h; - for(h = headers; h; h = h->next) { - char name[PATH_MAX]; - if(!header_path(h, name, PATH_MAX, 0)) - continue; - filler(buf, name, NULL, 0); - } - } - return 0; -} - -static int odrawfs_open(const char *path, struct fuse_file_info *fi) { - struct header *h; - for(h = headers; h; h = h->next) { - char name[PATH_MAX]; - if(!header_path(h, name, PATH_MAX, 1)) - continue; - if(strcmp(path, name)) - continue; - if((fi->flags & 3) != O_RDONLY) - return -EACCES; - return 0; - } - return -EACCES; -} - -static size_t read_blib_header(FILE *s, struct header *h) { - size_t n = 0; - const struct OfficeArtBlip_config *c = header_config(h); - if(c) { - byte guid[16]; - n += read_guid(s, guid); - if(member(h->h.recInstance, c->guid2)) - n += read_guid(s, guid); - if(c->metafileHeader) { - struct OfficeArtMetafileHeader h2; - n += read_OfficeArtMetafileHeader(s, &h2); - } - else { - byte b; - n += fread(&b, 1, 1, s); - } - } - return n; -} - -static int read_blib(char *filename, struct header *h, - char *buf, size_t size, off_t offset) { - int n = -EACCES; - FILE *f = fopen(filename, "r"); - if(f) { - fseek(f, h->fpos, SEEK_SET); - size_t hlen = read_blib_header(f, h); // TODO expose header as txt file? - struct peep_stream s = {f, ftell(f), h->h.recLen - hlen, 0}; - peep_stream_seek(&s, offset); - n = peep_stream_read(&s, buf, size); - fclose(f); - } - return n; -} - -static int odrawfs_read(const char *path, char *buf, size_t size, off_t offset, - struct fuse_file_info *fi) { - int n = -EACCES; - struct header *h; - for(h = headers; h; h = h->next) { - char name[PATH_MAX]; - if(!header_path(h, name, PATH_MAX, 1)) - continue; - if(strcmp(path, name)) - continue; - if((fi->flags & 3) != O_RDONLY) - break; - if('/' == filename[0]) - n = read_blib(filename, h, buf, size, offset); - else { - char x[PATH_MAX]; - snprintf(x, PATH_MAX, "%s/%s", cwd, filename); - n = read_blib(x, h, buf, size, offset); - } - break; - } - return n; -} - -static struct fuse_operations odrawfs_operations = { - .readdir = odrawfs_readdir, - .getattr = odrawfs_getattr, - .open = odrawfs_open, - .read = odrawfs_read, -}; - -static struct header *read_headers(FILE *stream) { - struct header *head = NULL, *tail = NULL; - int i; - for(i = 0;; i++) { - struct header *new = malloc(sizeof(struct header)); - if(read_RecordHeader(stream, &new->h) <= 0) { - free(new); - break; - } - new->i = i; - new->fpos = ftell(stream); - new->next = NULL; - /* printf("%d 0x%x 0x%x 0x%x %u %lu\n", new->i, new->h.recVer, */ - /* new->h.recInstance, new->h.recType, new->h.recLen, new->fpos); */ - fseek(stream, new->h.recLen, SEEK_CUR); - if(head) { - tail->next = new; - tail = new; - } - else - head = tail = new; - } - return head; -} - -int main(int argc, char *argv[]) { - if(argc < 3) { - fprintf(stderr, "Usage: %s filename directory\n", argv[0]); - exit(-1); - } - filename = argv[1]; - argv++; - argc--; - getcwd(cwd, PATH_MAX); - FILE *stream = fopen(filename, "r"); - if(!stream) { - fprintf(stderr, "Unable to open '%s'.\n", filename); - exit(-1); - } - headers = read_headers(stream); - fclose(stream); - return fuse_main(argc, argv, &odrawfs_operations); -} diff --git a/ppt.c b/ppt.c @@ -0,0 +1,455 @@ +// TODO proper little endian read/write + +const char *VERSION = +#include "VERSION" + ; + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <stdint.h> +#include <assert.h> +#include <limits.h> + +enum cmd {CMD_LS, CMD_CAT, CMD_TXT, CMD_HTML, CMD_EXTRACT}; + +typedef uint8_t byte; +typedef uint32_t dword; + +// MS-PPT PowerPoint (.ppt) Binary File Format + +struct RecordHeader { + ushort recVer: 4; //(logand #x0f %dummy1)) + ushort recInstance: 12; //(logior (ash %dummy2 4) (ash %dummy1 -4))) + ushort recType; + dword recLen; +} __attribute__((__packed__)); + +struct in { + FILE *stream; + long size; + char *buf; + long max; + long pos; +}; + +static long in_tell(struct in *in) { + if(in->buf) { + return in->pos; + } + long z = ftell(in->stream); + if(z < 0) { + fprintf(stderr, "stream not seekable\n"); + exit(1); + } + return z; +} + +#define MB (1024 * 1024) + +static void in_open(struct in *in, char *filename) { + in->stream = filename ? fopen(filename, "r") : stdin; + if(!in->stream) { + fprintf(stderr, "Unable to open '%s'.\n", filename); + exit(1); + } + in->size = 0; + in->buf = NULL; + in->max = 0; + in->pos = 0; + if(ftell(in->stream) < 0) { /* not seekable */ + in->size = MB; + in->buf = malloc(in->size); + in->max = 0; + while(!feof(in->stream)) { + if(in->size <= in->max) { + in->size += MB; + in->buf = realloc(in->buf, in->size); + } + size_t n = fread(&in->buf[in->max], 1, in->size - in->max, in->stream); + if(n < 0) { + fprintf(stderr, "unexpected end of file\n"); + exit(1); + } + in->max += n; + } + } +} + +static void in_close(struct in *in) { + free(in->buf); + fclose(in->stream); +} + +static size_t in_read(void *ptr, size_t size, size_t nmemb, struct in *in) { + if(in->buf) { + size_t n1 = size * nmemb; + size_t n2 = n1 <= in->max - in->pos ? n1 : in->max - in->pos; + memcpy(ptr, &in->buf[in->pos], n2); + in->pos += n2; + if(n2 % size) { + fprintf(stderr, "partial data read\n"); + exit(1); + } + return n2 / size; + } + return fread(ptr, size, nmemb, in->stream); +} + +static void in_seek(struct in *in, long offset) { + int z = 1; + if(in->buf) { + if(offset <= in->max) { + in->pos = offset; + return; + } + } else { + z = fseek(in->stream, offset, SEEK_SET); + } + if(z) { + fprintf(stderr, "seek to %ld failed\n", offset); + exit(1); + } +} + +static size_t read_RecordHeader(struct in *in, struct RecordHeader *x) { + return in_read(x, sizeof(struct RecordHeader), 1, in); +} + +static size_t write_RecordHeader(struct RecordHeader *x) { + return fwrite(x, sizeof(struct RecordHeader), 1, stdout); +} + +static void cat(struct in *in, FILE *out, dword n) { + for(dword i = 0; i < n;) { + char buf[4096]; + dword m = n - i; + size_t n1 = in_read(buf, 1, m <= 4096 ? m : 4096, in); + if(n1 <= 0) { + fprintf(stderr, "unexpected end of file\n"); + exit(1); + } + size_t n2 = fwrite(buf, 1, n1, out); + if(n1 != n2) { + fprintf(stderr, "output failed\n"); + exit(1); + } + i += n1; + } +} + +static void txt(struct RecordHeader *h, struct in *in) { + switch(h->recType) { + case 0x0fa0: // RT_TextCharsAtom utf16le + case 0x0fba: // RT_CString + for(int i = 0; i < h->recLen; i += 2) { + short c; + if(1 != in_read(&c, 2, 1, in)) { + fprintf(stderr, "unexpected end of file\n"); + exit(1); + } + if(0 < c && c < 0x80) { + switch(c) { + case 0x0d: puts(""); break; + default: putchar(c); // TODO whole utf + } + } + } + puts(""); + break; + case 0x0fa8: // RT_TextBytesAtom ascii + for(int i = 0; i < h->recLen; i++) { + char c; + if(1 != in_read(&c, 1, 1, in)) { + fprintf(stderr, "unexpected end of file\n"); + exit(1); + } + switch(c) { + case 0x0d: puts(""); break; + default: putchar(c); // TODO whole utf + } + } + puts(""); + break; + case 0x03ee: // RT_Slide + case 0x03e8: // RT_Document + { + static int slide = 0; + if(0 < slide) puts(" "); + slide++; + } + } +} + +static void html(struct RecordHeader *h, struct in *in) { + switch(h->recType) { + case 0x0fa0: // RT_TextCharsAtom utf16le + case 0x0fba: // RT_CString + printf("<p>"); + for(int i = 0; i < h->recLen; i += 2) { + short c; + if(1 != in_read(&c, 2, 1, in)) { + fprintf(stderr, "unexpected end of file\n"); + exit(1); + } + if(0 < c && c < 0x80) { + switch(c) { + case 0x0d: puts("<br>"); break; + default: putchar(c); // TODO whole utf + } + } + } + puts("</p>"); + break; + case 0x0fa8: // RT_TextBytesAtom ascii + printf("<p>"); + for(int i = 0; i < h->recLen; i++) { + char c; + if(1 != in_read(&c, 1, 1, in)) { + fprintf(stderr, "unexpected end of file\n"); + exit(1); + } + switch(c) { + case 0x0d: puts("<br>"); break; + default: putchar(c); // TODO whole utf + } + } + puts("</p>"); + break; + case 0x03ee: // RT_Slide + case 0x03e8: // RT_Document + { + static int slide = 0; + if(0 < slide) puts("<hr/>"); + slide++; + printf("<h1>Slide %d</h1>\n", slide); + } + } +} + +// MS-ODRAW Office Drawing Binary File Format + +struct POINT { + dword x; + dword y; +} __attribute__((__packed__)); + +struct RECT { + dword left; + dword top; + dword right; + dword bottom; +} __attribute__((__packed__)); + +struct OfficeArtMetafileHeader { + dword cbSize; + struct RECT rcBounds; + struct POINT ptSize; + dword cbSave; + byte compression; // :member '(#x00 #xfe)) + byte filter; //:always #xfe)) +} __attribute__((__packed__)); + +static size_t read_OfficeArtMetafileHeader(struct in *in, + struct OfficeArtMetafileHeader *x) { + return in_read(x, 1, sizeof(struct OfficeArtMetafileHeader), in); +} + +static size_t read_guid(struct in *in, byte guid[]) { + return in_read(guid, 1, 16, in); +} + +static const struct OfficeArtBlip_config { + ushort recType; + ushort recInstance[4]; + char *ext; + ushort guid2[2]; + int metafileHeader; +} OfficeArtBlip_config[] = { + {0xf01a, {0x3d4, 0x3d5, 0, 0}, "emf", {0x3d5, 0}, 1}, + {0xf01b, {0x216, 0x217, 0, 0}, "wmf", {0x217, 0}, 1}, + {0xf01c, {0x542, 0x543, 0, 0}, "pict", {0x543, 0}, 1}, + {0xf01d, {0x46a, 0x46b, 0x6e2, 0x6e3}, "jpeg", {0x46b, 0x6e3}, 0}, + {0xf01e, {0x6e0, 0x6e1, 0, 0}, "png", {0x6e1, 0}, 0}, + {0xf01f, {0x7a8, 0x7a9, 0, 0}, "dib", {0x7a9, 0}, 0}, + {0xf029, {0x6e4, 0x6e5, 0, 0}, "tiff", {0x6e5, 0}, 0}, + {0xf02a, {0x46a, 0x46b, 0x6e2, 0x6e3}, "jpeg", {0x46b, 0x6e3}, 0}, + {0} +}; + +static void extract(struct RecordHeader *h, struct in *in) { + static int img = 0; + for(int i = 0; OfficeArtBlip_config[i].recType; i++) { + const struct OfficeArtBlip_config *c = &OfficeArtBlip_config[i]; + if(h->recType == c->recType) { + char filename[PATH_MAX]; + snprintf(filename, PATH_MAX, "%d.%s", img++, c->ext); + size_t n = 0; + byte guid[16]; + n += read_guid(in, guid); + if(h->recInstance == c->guid2[0] + || (c->guid2[1] && h->recInstance == c->guid2[1])) + n += read_guid(in, guid); + if(c->metafileHeader) { + struct OfficeArtMetafileHeader h2; + n += read_OfficeArtMetafileHeader(in, &h2); + } else { + byte b; + n += in_read(&b, 1, 1, in); + } + FILE *f = fopen(filename, "w"); + cat(in, f, h->recLen - n); + fclose(f); + } + } +} + +static void out(struct RecordHeader *h, struct in *in, int level, int i, int xlevel, int xi, enum cmd c) { + switch(c) { + case CMD_LS: + printf("%4d %4d 0x%04x 0x%04x 0x%04x %10u\n", + level, i, h->recVer, h->recInstance, h->recType, h->recLen); + break; + case CMD_CAT: + if(level == xlevel && i == xi) { + if(1 != write_RecordHeader(h)) { + fprintf(stderr, "output failed\n"); + exit(1); + } + cat(in, stdout, h->recLen); + exit(0); + } + case CMD_TXT: txt(h, in); break; + case CMD_HTML: html(h, in); break; + case CMD_EXTRACT: extract(h, in); break; + } +} + +static void walk(struct in *in, int level, dword pos, int xlevel, int xi, enum cmd cmd) { + assert(0 <= level); + assert(0 <= pos); + for(int i = 0;; i++) { + if(0 < pos && pos <= in_tell(in)) + break; + struct RecordHeader h; + size_t n = read_RecordHeader(in, &h); + if(n <= 0) { + break; // EOF + } + if(1 != n) { + fprintf(stderr, "error reading record header\n"); + exit(1); + } + size_t start = in_tell(in), end = start + h.recLen; + out(&h, in, level, i, xlevel, xi, cmd); + if(0xf == h.recVer) { + if(0 < pos) + end = end < pos ? end : pos; + walk(in, 1 + level, end, xlevel, xi, cmd); + } else + in_seek(in, end); + } + // TODO xlevel xi not found -> exit(1) +} + +static int cmd_ls(char *argv[]) { + char *filename = argv[0]; + struct in in; + in_open(&in, filename); + walk(&in, 0, 0, -1, -1, CMD_LS); + in_close(&in); + return 0; +} + +static int cmd_cat(char *argv[]) { + char *level = argv[0]; + if(!level) { + fprintf(stderr, "level expected\n"); + return 1; + } + int xlevel; + if(1 != sscanf(level, "%d", &xlevel)) { + fprintf(stderr, "unknown level '%s'\n", level); + return 1; + } + char *i = argv[1]; + if(!i) { + fprintf(stderr, "i expected\n"); + return 1; + } + int xi; + if(1 != sscanf(i, "%d", &xi)) { + fprintf(stderr, "unknown i '%s'\n", i); + return 1; + } + char *filename = argv[2]; + struct in in; + in_open(&in, filename); + walk(&in, 0, 0, xlevel, xi, CMD_CAT); + in_close(&in); + return 0; +} + +static int cmd_txt(char *argv[]) { + char *filename = argv[0]; + struct in in; + in_open(&in, filename); + walk(&in, 0, 0, -1, -1, CMD_TXT); + in_close(&in); + return 0; +} + +static int cmd_html(char *argv[]) { + char *filename = argv[0]; + struct in in; + in_open(&in, filename); + walk(&in, 0, 0, -1, -1, CMD_HTML); + in_close(&in); + return 0; +} + +static int cmd_extract(char *argv[]) { + char *filename = argv[0]; + struct in in; + in_open(&in, filename); + walk(&in, 0, 0, -1, -1, CMD_EXTRACT); + in_close(&in); + return 0; +} + +static int cmd_help(void) { + printf("usage:\n"); + printf(" ppt ls [filename] list records\n"); + printf(" ppt cat level index [filename] print record\n"); + printf(" ppt txt [filename] print text\n"); + printf(" ppt html [filename] print html\n"); + printf(" ppt extract [filename] extract pictures\n"); + printf(" ppt --help print help\n"); + printf(" ppt --version print version\n"); + return 0; +} + +static int cmd_version(void) { + printf("%s\n", VERSION); + return 0; +} + +int main(int argc, char *argv[]) { + char *cmd = *++argv; + if(!cmd) { + fprintf(stderr, "command expected\n"); + return 1; + } + ++argv; + if(!strcmp("ls", cmd)) return cmd_ls(argv); + else if(!strcmp("cat", cmd)) return cmd_cat(argv); + else if(!strcmp("txt", cmd)) return cmd_txt(argv); + else if(!strcmp("html", cmd)) return cmd_html(argv); + else if(!strcmp("extract", cmd)) return cmd_extract(argv); + else if(!strcmp("--help", cmd)) return cmd_help(); + else if(!strcmp("--version", cmd)) return cmd_version(); + else fprintf(stderr, "unexpected command %s\n", cmd); + return 1; +} diff --git a/ppt2html.c b/ppt2html.c @@ -1,105 +0,0 @@ -// TODO proper little endian read_ - -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <stdint.h> - -#include "utf8.h" - -typedef uint32_t dword; - -// MS-PPT PowerPoint (.ppt) Binary File Format - -struct RecordHeader { - ushort recVer: 4; //(logand #x0f %dummy1)) - ushort recInstance: 12; //(logior (ash %dummy2 4) (ash %dummy1 -4))) - ushort recType; - dword recLen; -} __attribute__((__packed__)); - -static size_t read_RecordHeader(FILE *stream, struct RecordHeader *x) { - return fread(x, sizeof(struct RecordHeader), 1, stream); -} - -static int slide_no = 0; - -static void out(FILE *stream, int level, int i, struct RecordHeader *h) { - int j; - /* for(j = 0; j < level; j++) */ - /* printf(" "); */ - /* printf("%d 0x%x 0x%x 0x%x %u\n", i, h->recVer, h->recInstance, h->recType, h->recLen); */ - switch(h->recType) { - case 0x0fa0: // RT_TextCharsAtom utf16le - case 0x0fba: // RT_CString - printf("<p>"); - for(j = 0; j < h->recLen; j += 2) { - short c; - fread(&c, 2, 1, stream); - print_utf8(c); - } - puts("</p>"); - break; - case 0x0fa8: // RT_TextBytesAtom ascii - printf("<p>"); - for(j = 0; j < h->recLen; j++) { - char c; - fread(&c, 1, 1, stream); - if(c == 0x0d) - printf("<br/>\n"); - else - putchar(c); - } - puts("</p>"); - break; - case 0x03ee: // RT_Slide - case 0x03e8: // RT_Document - if(0 < slide_no) - puts("<hr/>\n</div>"); - printf("<div class=\"slide\">\n<h1>Slide %d</h1>\n", ++slide_no); - } -} - -static void dump(FILE *stream, int level, dword pos) { - int i; - for(i = 0;; i++) { - if(0 < pos && pos <= ftell(stream)) - break; - struct RecordHeader h; - if(read_RecordHeader(stream, &h) <= 0) { - break; // EOF - } - size_t start = ftell(stream); - out(stream, level, i, &h); - if(0xf == h.recVer) { - dword n = ftell(stream) + h.recLen; - if(0 < pos) - n = n < pos ? n : pos; - dump(stream, 1 + level, n); - } else - fseek(stream, start + h.recLen, SEEK_SET); - } -} - -int main(int argc, char *argv[]) { - if(argc < 2) { - fprintf(stderr, "Usage: %s filename\n", argv[0]); - exit(-1); - } - char *filename = argv[1]; - FILE *stream = fopen(filename, "r"); - if(!stream) { - fprintf(stderr, "Unable to open '%s'.\n", filename); - exit(-1); - } - puts("<html>\n<head>"); - printf("<title>%s</title>\n", filename); - puts("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>"); - puts("</head>\n<body>"); - dump(stream, 0, 0); - if(0 < slide_no) - puts("</div>"); - puts("</body>\n</html>"); - fclose(stream); - return 0; -} diff --git a/pptdump.sh b/pptdump.sh @@ -1,12 +0,0 @@ -#!/bin/sh -FILE=$(realpath $1) -DIR=$2 -FS=$(mktemp -d -q /tmp/pptdump-XXXXXX) -PIC=$(mktemp -d -q /tmp/pptdump-XXXXXX) -cfbfs "$FILE" "$FS" -odrawfs "$FS/Root Entry/Pictures" "$PIC" -ppt2html "$FS/Root Entry/PowerPoint Document" >"$DIR/index.html" -cp "$PIC"/* "$DIR" -fusermount -u "$FS" -fusermount -u "$PIC" -rmdir "$FS" "$PIC" diff --git a/pptview.sh b/pptview.sh @@ -1,4 +0,0 @@ -#!/bin/sh -DIR=$(mktemp -d -q /tmp/pptview-XXXXXX) -pptdump.sh "$1" "$DIR" -gnome-open "$DIR"/index.html diff --git a/rhdump.c b/rhdump.c @@ -1,64 +0,0 @@ -// TODO proper little endian read_ - -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <stdint.h> - -typedef uint32_t dword; - -// MS-PPT PowerPoint (.ppt) Binary File Format - -struct RecordHeader { - ushort recVer: 4; //(logand #x0f %dummy1)) - ushort recInstance: 12; //(logior (ash %dummy2 4) (ash %dummy1 -4))) - ushort recType; - dword recLen; -} __attribute__((__packed__)); - -static size_t read_RecordHeader(FILE *stream, struct RecordHeader *x) { - return fread(x, sizeof(struct RecordHeader), 1, stream); -} - -static void out(int level, int i, struct RecordHeader *h) { - int j; - for(j = 0; j < level; j++) - printf(" "); - printf("%d 0x%x 0x%x 0x%x %u\n", i, h->recVer, h->recInstance, h->recType, h->recLen); -} - -static void dump(FILE *stream, int level, dword pos) { - int i; - for(i = 0;; i++) { - if(0 < pos && pos <= ftell(stream)) - break; - struct RecordHeader h; - if(read_RecordHeader(stream, &h) <= 0) { - break; // EOF - } - out(level, i, &h); - if(0xf == h.recVer) { - dword n = ftell(stream) + h.recLen; - if(0 < pos) - n = n < pos ? n : pos; - dump(stream, 1 + level, n); - } else - fseek(stream, h.recLen, SEEK_CUR); - } -} - -int main(int argc, char *argv[]) { - if(argc < 2) { - fprintf(stderr, "Usage: %s filename\n", argv[0]); - exit(-1); - } - char *filename = argv[1]; - FILE *stream = fopen(filename, "r"); - if(!stream) { - fprintf(stderr, "Unable to open '%s'.\n", filename); - exit(-1); - } - dump(stream, 0, 0); - fclose(stream); - return 0; -} diff --git a/shell.nix b/shell.nix @@ -0,0 +1 @@ +(import <nixpkgs> {}).callPackage ./. {} diff --git a/utf8.h b/utf8.h @@ -1,3 +0,0 @@ -void OutputCharCorrected(unsigned char c); -void print_utf8(unsigned short c); -void put_utf8(unsigned short c);