commit eb117a980e2fa75c0495dfcf193aaee474a6f48f
parent 8925af9af2a2f65141fa80022bda3ffd1136f8a0
Author: Tomas Hlavaty <tom@logand.com>
Date: Mon, 30 May 2011 20:43:47 +0200
rhdump and ppt2html added
Diffstat:
M | Makefile | | | 13 | ++++++++++++- |
A | ppt2html.c | | | 105 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | rhdump.c | | | 64 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | utf8.c | | | 151 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | utf8.h | | | 3 | +++ |
5 files changed, 335 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-ALL=cfbfs odrawfs
+ALL=cfbfs odrawfs rhdump ppt2html
#-std=c99
CFLAGS=-g -Wall
#CFLAGS=-Wall -O2
@@ -16,5 +16,16 @@ odrawfs: odrawfs.c
$(CC) $(CFLAGS) $(CFLAGSFUSE) -o $@ $< $(LDFLAGS) $(LDFLAGSFUSE)
# strip $@
+rhdump: rhdump.c
+ $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+# strip $@
+
+utf8.o: utf8.c utf8.h
+ $(CC) $(CFLAGS) -o $@ -c $<
+
+ppt2html: ppt2html.c utf8.o
+ $(CC) $(CFLAGS) -o $@ $< utf8.o $(LDFLAGS)
+# strip $@
+
clean:
rm -f $(ALL)
diff --git a/ppt2html.c b/ppt2html.c
@@ -0,0 +1,105 @@
+// TODO proper little endian read_
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "utf8.h"
+
+typedef uint32_t dword;
+
+// MS-PPT PowerPoint (.ppt) Binary File Format
+
+struct RecordHeader {
+ ushort recVer: 4; //(logand #x0f %dummy1))
+ ushort recInstance: 12; //(logior (ash %dummy2 4) (ash %dummy1 -4)))
+ ushort recType;
+ dword recLen;
+} __attribute__((__packed__));
+
+static size_t read_RecordHeader(FILE *stream, struct RecordHeader *x) {
+ return fread(x, sizeof(struct RecordHeader), 1, stream);
+}
+
+static int slide_no = 0;
+
+static void out(FILE *stream, int level, int i, struct RecordHeader *h) {
+ int j;
+ /* for(j = 0; j < level; j++) */
+ /* printf(" "); */
+ /* printf("%d 0x%x 0x%x 0x%x %u\n", i, h->recVer, h->recInstance, h->recType, h->recLen); */
+ switch(h->recType) {
+ case 0x0fa0: // RT_TextCharsAtom utf16le
+ case 0x0fba: // RT_CString
+ printf("<p>");
+ for(j = 0; j < h->recLen; j += 2) {
+ short c;
+ fread(&c, 2, 1, stream);
+ print_utf8(c);
+ }
+ puts("</p>");
+ break;
+ case 0x0fa8: // RT_TextBytesAtom ascii
+ printf("<p>");
+ for(j = 0; j < h->recLen; j++) {
+ char c;
+ fread(&c, 1, 1, stream);
+ if(c == 0x0d)
+ printf("<br/>\n");
+ else
+ putchar(c);
+ }
+ puts("</p>");
+ break;
+ case 0x03ee: // RT_Slide
+ case 0x03e8: // RT_Document
+ if(0 < slide_no)
+ puts("<hr/>\n</div>");
+ printf("<div class=\"slide\">\n<h1>Slide %d</h1>\n", ++slide_no);
+ }
+}
+
+static void dump(FILE *stream, int level, dword pos) {
+ int i;
+ for(i = 0;; i++) {
+ if(0 < pos && pos <= ftell(stream))
+ break;
+ struct RecordHeader h;
+ if(read_RecordHeader(stream, &h) <= 0) {
+ break; // EOF
+ }
+ size_t start = ftell(stream);
+ out(stream, level, i, &h);
+ if(0xf == h.recVer) {
+ dword n = ftell(stream) + h.recLen;
+ if(0 < pos)
+ n = n < pos ? n : pos;
+ dump(stream, 1 + level, n);
+ } else
+ fseek(stream, start + h.recLen, SEEK_SET);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ if(argc < 2) {
+ fprintf(stderr, "Usage: %s filename\n", argv[0]);
+ exit(-1);
+ }
+ char *filename = argv[1];
+ FILE *stream = fopen(filename, "r");
+ if(!stream) {
+ fprintf(stderr, "Unable to open '%s'.\n", filename);
+ exit(-1);
+ }
+ puts("<html>\n<head>");
+ printf("<title>%s</title>\n", filename);
+ puts("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>");
+ puts("</head>\n<body>");
+ dump(stream, 0, 0);
+ if(0 < slide_no)
+ puts("</div>");
+ puts("</body>\n</html>");
+ fclose(stream);
+ return 0;
+}
diff --git a/rhdump.c b/rhdump.c
@@ -0,0 +1,64 @@
+// TODO proper little endian read_
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+typedef uint32_t dword;
+
+// MS-PPT PowerPoint (.ppt) Binary File Format
+
+struct RecordHeader {
+ ushort recVer: 4; //(logand #x0f %dummy1))
+ ushort recInstance: 12; //(logior (ash %dummy2 4) (ash %dummy1 -4)))
+ ushort recType;
+ dword recLen;
+} __attribute__((__packed__));
+
+static size_t read_RecordHeader(FILE *stream, struct RecordHeader *x) {
+ return fread(x, sizeof(struct RecordHeader), 1, stream);
+}
+
+static void out(int level, int i, struct RecordHeader *h) {
+ int j;
+ for(j = 0; j < level; j++)
+ printf(" ");
+ printf("%d 0x%x 0x%x 0x%x %u\n", i, h->recVer, h->recInstance, h->recType, h->recLen);
+}
+
+static void dump(FILE *stream, int level, dword pos) {
+ int i;
+ for(i = 0;; i++) {
+ if(0 < pos && pos <= ftell(stream))
+ break;
+ struct RecordHeader h;
+ if(read_RecordHeader(stream, &h) <= 0) {
+ break; // EOF
+ }
+ out(level, i, &h);
+ if(0xf == h.recVer) {
+ dword n = ftell(stream) + h.recLen;
+ if(0 < pos)
+ n = n < pos ? n : pos;
+ dump(stream, 1 + level, n);
+ } else
+ fseek(stream, h.recLen, SEEK_CUR);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ if(argc < 2) {
+ fprintf(stderr, "Usage: %s filename\n", argv[0]);
+ exit(-1);
+ }
+ char *filename = argv[1];
+ FILE *stream = fopen(filename, "r");
+ if(!stream) {
+ fprintf(stderr, "Unable to open '%s'.\n", filename);
+ exit(-1);
+ }
+ dump(stream, 0, 0);
+ fclose(stream);
+ return 0;
+}
diff --git a/utf8.c b/utf8.c
@@ -0,0 +1,151 @@
+/*
+ pptHtml - Format a PowerPoint Presentation into Html
+ Copyright 2002 Charles N Wyble <jackshck@yahoo.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "utf8.h"
+
+#include "stdio.h"
+
+void OutputCharCorrected(unsigned char c)
+{
+ switch (c)
+ { /* Special char handlers here... */
+ case '\r':
+ printf("<BR>\n");
+ break;
+ case 0x3C:
+ printf("<");
+ break;
+ case 0x3E:
+ printf(">");
+ break;
+ case 0x26:
+ printf("&");
+ break;
+ case 0x22:
+ printf(""");
+ break;
+ /* Also need to cover 128-159 since MS uses this area... */
+ case 0x80: /* Euro Symbol */
+ printf("€");
+ break;
+ case 0x82: /* baseline single quote */
+ printf("‚");
+ break;
+ case 0x83: /* florin */
+ printf("ƒ");
+ break;
+ case 0x84: /* baseline double quote */
+ printf("„");
+ break;
+ case 0x85: /* ellipsis */
+ printf("…");
+ break;
+ case 0x86: /* dagger */
+ printf("†");
+ break;
+ case 0x87: /* double dagger */
+ printf("‡");
+ break;
+ case 0x88: /* circumflex accent */
+ printf("ˆ");
+ break;
+ case 0x89: /* permile */
+ printf("‰");
+ break;
+ case 0x8A: /* S Hacek */
+ printf("Š");
+ break;
+ case 0x8B: /* left single guillemet */
+ printf("‹");
+ break;
+ case 0x8C: /* OE ligature */
+ printf("Œ");
+ break;
+ case 0x8E: /* #LATIN CAPITAL LETTER Z WITH CARON */
+ printf("Ž");
+ break;
+ case 0x91: /* left single quote ? */
+ printf("‘");
+ break;
+ case 0x92: /* right single quote ? */
+ printf("’");
+ break;
+ case 0x93: /* left double quote */
+ printf("“");
+ break;
+ case 0x94: /* right double quote */
+ printf("”");
+ break;
+ case 0x95: /* bullet */
+ printf("•");
+ break;
+ case 0x96: /* endash */
+ printf("–");
+ break;
+ case 0x97: /* emdash */
+ printf("—");
+ break;
+ case 0x98: /* tilde accent */
+ printf("˜");
+ break;
+ case 0x99: /* trademark ligature */
+ printf("™");
+ break;
+ case 0x9A: /* s Haceks Hacek */
+ printf("š");
+ break;
+ case 0x9B: /* right single guillemet */
+ printf("›");
+ break;
+ case 0x9C: /* oe ligature */
+ printf("œ");
+ break;
+ case 0x9F: /* Y Dieresis */
+ printf("Ÿ");
+ break;
+ default:
+ putchar(c);
+ break;
+ }
+}
+
+void print_utf8(unsigned short c)
+{
+ if (c == 0)
+ return;
+
+ if (c < 0x80)
+ OutputCharCorrected(c);
+ else if (c < 0x800)
+ {
+ putchar(0xC0 | (c >> 6));
+ put_utf8(c);
+ }
+ else
+ {
+ putchar(0xE0 | (c >> 12));
+ put_utf8(c >> 6);
+ put_utf8(c);
+ }
+}
+
+void put_utf8(unsigned short c)
+{
+ putchar(0x0080 | ((short)c & 0x003F));
+}
diff --git a/utf8.h b/utf8.h
@@ -0,0 +1,3 @@
+void OutputCharCorrected(unsigned char c);
+void print_utf8(unsigned short c);
+void put_utf8(unsigned short c);