cl-olefs

OLE File System tools for Common Lisp
git clone https://logand.com/git/cl-olefs.git/
Log | Files | Refs

olefs.lisp (64946B)


      1 ;;; Copyright (C) 2011, 2012, 2013, 2014 Tomas Hlavaty <tom@logand.com>
      2 ;;;
      3 ;;; Permission is hereby granted, free of charge, to any person
      4 ;;; obtaining a copy of this software and associated documentation
      5 ;;; files (the "Software"), to deal in the Software without
      6 ;;; restriction, including without limitation the rights to use, copy,
      7 ;;; modify, merge, publish, distribute, sublicense, and/or sell copies
      8 ;;; of the Software, and to permit persons to whom the Software is
      9 ;;; furnished to do so, subject to the following conditions:
     10 ;;;
     11 ;;; The above copyright notice and this permission notice shall be
     12 ;;; included in all copies or substantial portions of the Software.
     13 ;;;
     14 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     15 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     16 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     17 ;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
     18 ;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
     19 ;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20 ;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21 ;;; DEALINGS IN THE SOFTWARE.
     22 
     23 (in-package :olefs)
     24 
     25 (defun double-float-from-bits (high low)
     26   (declare (optimize (speed 3) (debug 0))
     27            (type (unsigned-byte 32) high low))
     28   #+ccl
     29   (ccl::double-float-from-bits high low)
     30   #+sbcl
     31   (sb-kernel:make-double-float (sb-c::mask-signed-field 32 high) low)
     32   #-(or ccl sbcl)
     33   (let ((bignum 0))
     34     (declare (type (unsigned-byte 64) bignum))
     35     (setf (ldb (byte 32  0) bignum) low
     36           (ldb (byte 32 32) bignum) high)
     37     (ieee-floats:decode-float64 bignum)))
     38 
     39 (defmacro with-stream ((var stream) &body body)
     40   `(let ((,var ,stream))
     41      (unwind-protect (progn ,@body)
     42        (funcall ,var 'close))))
     43 
     44 (defun stream-position (stream &optional newpos)
     45   (if (functionp stream)
     46       (funcall stream 'stream-position newpos)
     47       (if newpos
     48           (file-position stream newpos)
     49           (file-position stream))))
     50 
     51 (defun physical-stream-position (stream)
     52   (if (functionp stream)
     53       (funcall stream 'physical-stream-position)
     54       (file-position stream)))
     55 
     56 (defun read-octet (stream)
     57   (if (functionp stream)
     58       (funcall stream 'read-octet)
     59       (read-byte stream)))
     60 
     61 (defun copy-stream (in out)
     62   (handler-case (loop (write-byte (read-octet in) out))
     63     (end-of-file ())))
     64 
     65 (defun copy-file (in out)
     66   (with-open-file (i in :element-type '(unsigned-byte 8))
     67     (with-open-file (o out
     68                        :element-type '(unsigned-byte 8)
     69                        :direction :output
     70                        :if-exists :error
     71                        :if-does-not-exist :create)
     72       (loop
     73          with buf = (make-array 4096 :element-type '(unsigned-byte 8))
     74          with n = nil
     75          while (plusp (setq n (read-sequence buf i)))
     76          do (write-sequence buf o :end n)))))
     77 
     78 (defun shorter-stream (stream size)
     79   (let ((offset 0)
     80         self)
     81     (setq self
     82           (lambda (msg)
     83             (assert stream)
     84             (ecase msg
     85               (close (setq stream nil))
     86               (stream-position offset)
     87               (physical-stream-position (physical-stream-position stream))
     88               (read-octet
     89                (unless (< offset size)
     90                  (error 'end-of-file :stream self))
     91                (incf offset)
     92                (read-octet stream)))))))
     93 
     94 (defun vector-stream (vector physical-stream-position)
     95   (let ((offset 0)
     96         (size (length vector))
     97         self)
     98     (setq self
     99           (lambda (msg)
    100             (assert vector)
    101             (ecase msg
    102               (close (setq vector nil))
    103               (stream-position offset)
    104               (physical-stream-position (+ offset physical-stream-position))
    105               (read-octet
    106                (unless (< offset size)
    107                  (error 'end-of-file :stream self))
    108                (prog1 (aref vector offset)
    109                  (incf offset))))))))
    110 
    111 ;;; MS-CFB Compound File Binary File Format
    112 
    113 (defconstant +unused-sector+ 0)
    114 (defconstant +maxregsect+ #xfffffffa)
    115 (defconstant +difsect+ #xfffffffc)
    116 (defconstant +fatsect+ #xfffffffd)
    117 (defconstant +endofchain+ #xfffffffe)
    118 (defconstant +freesect+ #xffffffff)
    119 
    120 (defconstant +maxregsig+ #xfffffffa)
    121 (defconstant +nostream+ #xffffffff)
    122 
    123 #+nil
    124 (defconstant clsid-null (make-array 16
    125                                     :element-type '(unsigned-byte 8)
    126                                     :initial-element 0))
    127 
    128 (defun read-ushort (stream)
    129   (logior (read-octet stream)
    130           (ash (read-octet stream) 8)))
    131 
    132 (defun read-dword (stream)
    133   (logior (read-octet stream)
    134           (ash (read-octet stream) 8)
    135           (ash (read-octet stream) 16)
    136           (ash (read-octet stream) 24)))
    137 
    138 (defun read-ulonglong (stream)
    139   (logior (read-octet stream)
    140           (ash (read-octet stream) 8)
    141           (ash (read-octet stream) 16)
    142           (ash (read-octet stream) 24)
    143           (ash (read-octet stream) 32)
    144           (ash (read-octet stream) 40)
    145           (ash (read-octet stream) 48)
    146           (ash (read-octet stream) 56)))
    147 
    148 (defun read-achar (stream)
    149   (read-octet stream))
    150 
    151 (defun read-wchar (stream)
    152   (read-ushort stream))
    153 
    154 (defun read-filetime (stream)
    155   (read-ulonglong stream))
    156 
    157 (defun read-octets (stream n)
    158   (let ((x (make-array n :element-type '(unsigned-byte 8) :initial-element 0)))
    159     (if (functionp stream)
    160         (let ((i 0))
    161           (handler-case (do ()
    162                             ((<= n i))
    163                           (setf (aref x i) (read-octet stream))
    164                           (incf i))
    165             (end-of-file () i)))
    166         (read-sequence x stream))
    167     x))
    168 
    169 (defun read-guid (stream)
    170   (read-octets stream 16))
    171 
    172 (defun read-vector (stream n element-type reader)
    173   (let ((x (make-array n :element-type element-type :initial-element 0)))
    174     (dotimes (i n x)
    175       (setf (aref x i) (funcall reader stream)))))
    176 
    177 (define-structure ole-header ()
    178   (signature (ubyte 8))
    179   (clsid guid)
    180   (minor-version ushort)
    181   (major-version ushort)
    182   (byte-order ushort)
    183   (sector-shift ushort)
    184   (mini-sector-shift ushort)
    185   (reserved (ubyte 6))
    186   (number-of-directory-sectors dword)
    187   (number-of-fat-sectors dword)
    188   (first-directory-sector-location dword)
    189   (transaction-signature-number dword)
    190   (mini-stream-cutoff-size dword)
    191   (first-mini-fat-sector-location dword)
    192   (number-of-mini-fat-sectors dword)
    193   (first-difat-sector-location dword)
    194   (number-of-difat-sectors dword))
    195 
    196 (define-structure ole-entry ()
    197   (name (wchar 32))
    198   (name-length ushort)
    199   (object-type ubyte)
    200   (color-flag ubyte)
    201   (left-sibling-id dword)
    202   (right-sibling-id dword)
    203   (child-id dword)
    204   (clsid guid)
    205   (state-bits dword)
    206   (creation-time filetime)
    207   (modified-time filetime)
    208   (starting-sector-location dword)
    209   (stream-size ulonglong))
    210 
    211 (defun string-from-achars (achars &optional length) ;; TODO encoding?
    212   (let* ((n (or length (length achars)))
    213          (s (make-string n)))
    214     (dotimes (i n s)
    215       (let ((c (aref achars i)))
    216         (assert (plusp c))
    217         (setf (aref s i) (code-char c))))))
    218 
    219 (defun string-from-wchars (wchars &optional length) ;; TODO encoding?
    220   (let* ((n (or length (length wchars)))
    221          (s (make-string n)))
    222     (dotimes (i n s)
    223       (let ((c (aref wchars i)))
    224         (assert (plusp c))
    225         (setf (aref s i) (code-char c))))))
    226 
    227 (defun string-from-octets (octets fHighByte &optional nbytes) ;; TODO encoding?
    228   (if fHighByte
    229       (multiple-value-bind (n m) (floor (or nbytes (length octets)) 2)
    230         (assert (zerop m))
    231         (let ((s (make-string n)))
    232           (dotimes (i n s)
    233             (let ((c (let ((2*i (ash i 1)))
    234                        (+ (aref octets 2*i)
    235                           (ash (aref octets (1+ 2*i)) 8)))))
    236               (assert (plusp c))
    237               (setf (aref s i) (code-char c))))))
    238       (string-from-achars octets nbytes)))
    239 
    240 (defun ole-entry-name-to-string (octets n)
    241   (multiple-value-bind (n m) (floor n 2)
    242     (assert (zerop m))
    243     (string-from-achars octets (1- n)))) ;; minus #\null
    244 
    245 (defun print-ole-entry (ole-entry stream)
    246   (print-unreadable-object (ole-entry stream :type t :identity t)
    247     (format stream "~s ~a ~a ~sB @~s"
    248             (ole-entry-name-to-string (ole-entry.name ole-entry)
    249                                       (ole-entry.name-length ole-entry))
    250             (ecase (ole-entry.object-type ole-entry)
    251               (0 "unknown")
    252               (1 "storage")
    253               (2 "stream")
    254               (5 "root"))
    255             (ecase (ole-entry.color-flag ole-entry)
    256               (0 "red")
    257               (1 "black"))
    258             (ole-entry.stream-size ole-entry)
    259             (ole-entry.starting-sector-location ole-entry))))
    260 
    261 (defun location-position (location)
    262   (* (1+ location) 512))
    263 
    264 (defun seek-sector (location stream)
    265   (let ((position (location-position location)))
    266     (assert (stream-position stream position))
    267     location))
    268 
    269 (defun check-ole-header (x)
    270   (assert (equalp #(#xd0 #xcf #x11 #xe0 #xa1 #xb1 #x1a #xe1) (ole-header.signature x)))
    271   ;;(assert (equalp clsid-null (ole-header.clsid x)))
    272   (assert (eql #xfffe (ole-header.byte-order x)))
    273   (assert (equalp #(0 0 0 0 0 0) (ole-header.reserved x)))
    274   ;; TODO
    275   (assert (eql 3 (ole-header.major-version x)))
    276   (assert (eql 512 (ash 1 (ole-header.sector-shift x))))
    277   (assert (eql 64 (ash 1 (ole-header.mini-sector-shift x))))
    278   (assert (eql 0 (ole-header.number-of-directory-sectors x)))
    279   ;;(assert (eql #xfffffffe (first-directory-sector-location x)))
    280   (assert (eql 0 (ole-header.transaction-signature-number x)))
    281   (assert (eql 4096 (ole-header.mini-stream-cutoff-size x)))
    282   ;;(assert (eql #xfffffffe (first-mini-fat-sector-location x)))
    283   (unless (plusp (ole-header.number-of-difat-sectors x))
    284     (assert (eql #xfffffffe (ole-header.first-difat-sector-location x)))))
    285 
    286 (defun sector-chain (fat location)
    287   (labels ((rec (x)
    288              (unless (member x (list +difsect+ +fatsect+ +endofchain+ +freesect+))
    289                (assert (and #+nil(< +unused-sector+ x) (<= 0 x +maxregsect+)))
    290                (cons x (rec (aref fat x))))))
    291     (rec location)))
    292 
    293 (defun read-values (array reader stream &optional (start 0) end)
    294   (loop
    295      for i from start below (or end (length array))
    296      do (setf (aref array i) (funcall reader stream))))
    297 
    298 (defun read-difat (header stream)
    299   (let ((x (make-array (+ 109
    300                           (* #.(/ (- 512 4) 4)
    301                              (ole-header.number-of-difat-sectors header)))
    302                        :element-type 'dword)))
    303     (read-values x 'read-dword stream 0 109)
    304     (loop
    305        with m = #.(1- (/ 512 4))
    306        for n = (ole-header.first-difat-sector-location header)
    307        then (read-dword stream)
    308        for i = 109 then (+ m i)
    309        until (= +endofchain+ n)
    310        do (progn
    311             (seek-sector n stream)
    312             (read-values x 'read-dword stream i (+ m i))))
    313     x))
    314 
    315 (defun read-fat (difat stream)
    316   (let* ((m #.(/ 512 4))
    317          (n (length difat))
    318          (x (make-array (* m n) :element-type 'dword)))
    319     (dotimes (i n x)
    320       (let ((s (aref difat i)))
    321         (unless (= +freesect+ s)
    322           (seek-sector s stream)
    323           (read-values x 'read-dword stream (* m i) (* m (1+ i))))))))
    324 
    325 (defun read-directories (chain stream)
    326   (let* ((m #.(/ 512 128))
    327          (x (make-array (* m (length chain))
    328                         :element-type '(or null ole-entry)
    329                         :initial-element nil))
    330          (i -1))
    331     (dolist (s chain x)
    332       (seek-sector s stream)
    333       (dotimes (j m)
    334         (setf (aref x (incf i)) (read-ole-entry stream))))))
    335 
    336 (defun read-mfat (chain stream)
    337   (let* ((m #.(/ 512 4))
    338          (x (make-array (* m (length chain)) :element-type 'dword))
    339          (i -1))
    340     (dolist (s chain x)
    341       (seek-sector s stream)
    342       ;;TODO block read (read-values x 'read-dword stream (* m i) (* m (1+ i)))
    343       (dotimes (j m)
    344         (setf (aref x (incf i)) (read-dword stream))))))
    345 
    346 (defun %ole-entry-stream (header fat directories mfat stream ole-entry)
    347   (let* ((offset 0)
    348          (mini (< (ole-entry.stream-size ole-entry)
    349                   (ole-header.mini-stream-cutoff-size header)))
    350          (chain (let ((x (sector-chain
    351                           fat
    352                           (ole-entry.starting-sector-location
    353                            (if mini
    354                                (aref directories 0)
    355                                ole-entry)))))
    356                   (when x
    357                     (coerce x 'vector))))
    358          (mchain (when mini
    359                    (let ((x (sector-chain
    360                              mfat
    361                              (ole-entry.starting-sector-location ole-entry))))
    362                      (when x
    363                        (coerce x 'vector)))))
    364          sector
    365          (buffer (make-array 512 :element-type '(unsigned-byte 8)))
    366          (size (ole-entry.stream-size ole-entry))
    367          self)
    368     (setq self
    369           (lambda (msg &rest args)
    370             (assert stream)
    371             (flet ((next-octet (consumep)
    372                      ;; (values <current-byte> <position-of-current-byte>)
    373                      ;; Advance the stream by a byte if CONSUMEP is true, except at eof.
    374                      (assert (not (minusp offset)))
    375                      (unless (< offset size)
    376                        (error 'end-of-file :stream self))
    377                      (flet ((pick (q i)
    378                               (unless (eql sector q)
    379                                 (seek-sector (aref chain q) stream)
    380                                 (let ((n (read-sequence buffer stream)))
    381                                   (assert (eql 512 n)))
    382                                 (setq sector q))
    383                               (multiple-value-prog1
    384                                   (values (aref buffer i)
    385                                           (+ i (location-position (aref chain sector))))
    386                                 (when consumep
    387                                   (incf offset)))))
    388                        (if mchain
    389                            (multiple-value-bind (mq mr) (floor offset 64)
    390                              (multiple-value-bind (q r) (floor (aref mchain mq) (/ 512 64))
    391                                (pick q (+ (* r 64) mr))))
    392                            (multiple-value-bind (q r) (floor offset 512)
    393                              (pick q r))))))
    394               (ecase msg
    395                 (close (setq stream nil))
    396                 (stream-position
    397                  (destructuring-bind (&optional newpos) args
    398                    (if newpos
    399                        (setf offset newpos
    400                              sector nil)
    401                        offset)))
    402                 (physical-stream-position (nth-value 1 (next-octet nil)))
    403                 (read-octet (values (next-octet t)))))))))
    404 
    405 (defun ole-entry-stream (ole-file entry)
    406   (funcall ole-file 'ole-entry-stream entry))
    407 
    408 (defun ole-directory-stream (ole-file)
    409   (funcall ole-file 'ole-directory-stream))
    410 
    411 (defun find-ole-entry (ole-file &key name type)
    412   (loop
    413      with s = (ole-directory-stream ole-file)
    414      with e = nil
    415      while (setq e (funcall s))
    416      do (when (and (or (not type)
    417                        (let ((x (ole-entry.object-type e)))
    418                          (if (atom type)
    419                              (eql x type)
    420                              (member x type))))
    421                    (or (not name)
    422                        (let ((x (ole-entry-name-to-string
    423                                  (ole-entry.name e)
    424                                  (ole-entry.name-length e))))
    425                          (if (atom name)
    426                              (equal x name)
    427                              (member x name :test #'equal)))))
    428           (return-from find-ole-entry e))))
    429 
    430 (defun %ole-directory-stream (directories)
    431   (let ((pending (list (cons 0 0))))
    432     (lambda ()
    433       (block done
    434         (loop
    435            (if pending
    436                (destructuring-bind (n &rest level) (pop pending)
    437                  (let ((e (aref directories n)))
    438                    (unless (zerop (ole-entry.object-type e))
    439                      (let ((id (ole-entry.right-sibling-id e)))
    440                        (when (<= id +maxregsig+)
    441                          (push (cons id level) pending)))
    442                      (let ((id (ole-entry.child-id e)))
    443                        (when (<= id +maxregsig+)
    444                          (push (cons id (1+ level)) pending)))
    445                      (let ((id (ole-entry.left-sibling-id e)))
    446                        (when (<= id +maxregsig+)
    447                          (push (cons id level) pending)))
    448                      (return-from done (values e n level)))))
    449                (return-from done)))))))
    450 
    451 (defun ole-file-stream (filename)
    452   (let* ((stream (open filename :element-type '(unsigned-byte 8)))
    453          (header (read-ole-header stream))
    454          (difat (read-difat header stream))
    455          (fat (read-fat difat stream))
    456          (directory-chain (sector-chain
    457                            fat
    458                            (ole-header.first-directory-sector-location header)))
    459          (directories (read-directories directory-chain stream))
    460          (mfat-chain (sector-chain
    461                       fat
    462                       (ole-header.first-mini-fat-sector-location header)))
    463          (mfat (read-mfat mfat-chain stream)))
    464     (check-ole-header header)
    465     ;;(describe header)
    466     ;;(terpri)
    467     (lambda (msg &rest args)
    468       (assert stream)
    469       (ecase msg
    470         (close
    471          (close stream)
    472          (setq stream nil))
    473         (ole-entry-stream
    474          (destructuring-bind (entry) args
    475            (%ole-entry-stream header fat directories mfat stream entry)))
    476         (ole-directory-stream (%ole-directory-stream directories))))))
    477 
    478 (defun extract-pictures (ole-file dir html)
    479   (walk-RecordHeader-tree
    480    ole-file
    481    (find-ole-entry ole-file :name "Pictures" :type 2)
    482    (lambda (in level i h start end parents)
    483      (declare (ignore level start end parents))
    484      (multiple-value-bind (blip kind)
    485          (read-record-body
    486           in
    487           h
    488           (lambda (blip in)
    489             (with-open-file (out (format nil "~a/~d.~a"
    490                                          dir
    491                                          i
    492                                          (blip-ext blip))
    493                                  :direction :output
    494                                  :if-does-not-exist :create
    495                                  :if-exists :supersede
    496                                  :element-type '(unsigned-byte 8))
    497               (copy-stream in out))))
    498        (declare (ignore blip))
    499        (when html
    500          (format html "<p><img src=\"~d.~(~a~)\">~%" i kind))))))
    501 
    502 (defun extract-ole-file (filename &optional (dir "/tmp")) ;; TODO extract audio files
    503   (with-stream (ole-file (ole-file-stream filename))
    504     (with-open-file (html (format nil "~a/index.html" dir)
    505                           :direction :output
    506                           :if-does-not-exist :create
    507                           :if-exists :supersede
    508                           :element-type 'character)
    509       (extract-pictures ole-file dir html))))
    510 
    511 ;;; MS-PPT PowerPoint (.ppt) Binary File Format
    512 
    513 (define-structure RecordHeader ()
    514   (%dummy1 ubyte)
    515   (%dummy2 ubyte)
    516   (recVer t :compute (logand #x0f %dummy1))
    517   (recInstance t :compute (logior (ash %dummy2 4) (ash %dummy1 -4)))
    518   (recType ushort)
    519   (recLen dword))
    520 
    521 (define-structure CurrentUserAtom ()
    522   (size dword :always #x14)
    523   (headerToken dword)
    524   (offsetToCurrentEdit dword)
    525   (lenUserName ushort)
    526   (docFileVersion ushort)
    527   (majorVersion ubyte)
    528   (minorVersion ubyte)
    529   (unused ushort)
    530   (ansiUserName (achar lenUserName))
    531   (relVersion dword)
    532   (unicodeUserName (wchar lenUserName)))
    533 
    534 ;;; MS-ODRAW Office Drawing Binary File Format
    535 
    536 (define-structure POINT ()
    537   (x dword)
    538   (y dword))
    539 
    540 (define-structure RECT ()
    541   (left dword)
    542   (top dword)
    543   (right dword)
    544   (bottom dword))
    545 
    546 (define-structure OfficeArtMetafileHeader ()
    547   (cbSize dword)
    548   (rcBounds RECT)
    549   (ptSize POINT)
    550   (cbSave dword)
    551   (compression ubyte :member '(#x00 #xfe))
    552   (filter ubyte :always #xfe))
    553 
    554 (define-structure PersistDirectoryEntry ()
    555   (%dummy dword)
    556   (persistId t :compute (logand #xfffff %dummy))
    557   (cPersist t :compute (ash %dummy -20))
    558   (rgPersistOffset (dword cPersist)))
    559 
    560 (defstruct blip header ext guid guid2 metafileHeader)
    561 
    562 (defstruct PersistDirectoryAtom header entries)
    563 
    564 (defun read-record-body (stream RecordHeader &optional fn) ;; TODO move up?!
    565   (let ((x RecordHeader #+nil(read-RecordHeader stream)))
    566     (with-slots (recVer recInstance recType recLen) x
    567       (flet ((blip (ext guid2 &optional metafileHeader)
    568                (with-stream (in (shorter-stream stream (RecordHeader.recLen x)))
    569                  (let* ((start (stream-position stream))
    570                         (end (+ start (RecordHeader.recLen x)))
    571                         (y (make-blip
    572                             :header x
    573                             :ext ext
    574                             :guid (read-guid in)
    575                             :guid2 (when (member recInstance guid2)
    576                                      (read-guid in))
    577                             :metafileHeader (if metafileHeader
    578                                                 (read-OfficeArtMetafileHeader in)
    579                                                 (read-octet in)))))
    580                    (when fn
    581                      (funcall fn y in))
    582                    (unless (eql end (stream-position stream))
    583                      (stream-position stream end))
    584                    y))))
    585         (ecase recType
    586           (#.RT_CurrentUserAtom
    587            (assert (zerop recVer))
    588            (assert (zerop recInstance))
    589            (list x (read-CurrentUserAtom stream))
    590            #+nil ;; why recLen too small?
    591            (with-shorter-stream (in stream (RecordHeader.recLen x))
    592              (list x (read-CurrentUserAtom in))))
    593           (#.RT_UserEditAtom
    594            (assert (zerop recVer))
    595            (assert (zerop recInstance))
    596            (with-stream (in (shorter-stream stream (RecordHeader.recLen x)))
    597              (list x (read-UserEditAtom in))))
    598           (#.RT_PersistDirectoryAtom
    599            (assert (zerop recVer))
    600            (assert (zerop recInstance))
    601            (let ((n (RecordHeader.recLen x)))
    602              ;;(print n)
    603              (with-stream (in (shorter-stream stream n))
    604                (make-PersistDirectoryAtom
    605                 :header x
    606                 :entries (loop
    607                             for fpos = 0 then (stream-position in)
    608                             while (< fpos n)
    609                             collect (progn
    610                                       ;;(print fpos)
    611                                       (read-PersistDirectoryEntry in)))))))
    612           #+nil
    613           (#.RT_Document ;; TODO
    614            )
    615           (#.RT_OfficeArtBlipEMF
    616            (assert (zerop recVer))
    617            (assert (member recInstance '(#x3d4 #x3d5)))
    618            (values (blip "emf" '(#x3d5) t) :emf))
    619           (#.RT_OfficeArtBlipWMF
    620            (assert (zerop recVer))
    621            (assert (member recInstance '(#x216 #x217)))
    622            (values (blip "wmf" '(#x217) t) :wmf))
    623           (#.RT_OfficeArtBlipPICT
    624            (assert (zerop recVer))
    625            (assert (member recInstance '(#x542 #x543)))
    626            (values (blip "pict" '(#x543) t) :pict))
    627           (#.RT_OfficeArtBlipJPEG1
    628            (assert (zerop recVer))
    629            (assert (member recInstance '(#x46A #x46B #x6E2 #x6E3)))
    630            (values (blip "jpeg" '(#x46B #x6E3)) :jpeg))
    631           (#.RT_OfficeArtBlipPNG
    632            (assert (zerop recVer))
    633            (assert (member recInstance '(#x6e0 #x6e1)))
    634            (values (blip "png"'(#x6e1)) :png))
    635           (#.RT_OfficeArtBlipDIB
    636            (assert (zerop recVer))
    637            (assert (member recInstance '(#x7a8 #x7a9)))
    638            (values (blip "dib" '(#x7a9)) :dib))
    639           (#.RT_OfficeArtBlipTIFF
    640            (assert (zerop recVer))
    641            (assert (member recInstance '(#x6e4 #x6e5)))
    642            (values (blip "tiff" '(#x6e5)) :tiff))
    643           (#.RT_OfficeArtBlipJPEG2
    644            (assert (zerop recVer))
    645            (assert (member recInstance '(#x46A #x46B #x6E2 #x6E3)))
    646            (values (blip "jpeg" '(#x46B #x6E3)) :jpeg)))))))
    647 
    648 (defun read-record (stream &optional fn)
    649   (read-record-body stream (read-RecordHeader stream) fn))
    650 
    651 (defun walk-RecordHeader-tree (ole-file entry fn &optional post-fn)
    652   (when entry
    653     (with-stream (in (ole-entry-stream ole-file entry))
    654       (labels ((rec (level pos parents)
    655                  (handler-case
    656                      (loop
    657                         for i from 0
    658                         until (<= 1 pos (stream-position in))
    659                         do (let* ((h (read-RecordHeader in))
    660                                   (start (stream-position in))
    661                                   (end (+ start (RecordHeader.recLen h))))
    662                              (funcall fn in level i h start end parents)
    663                              (if (= #xf (RecordHeader.recVer h))
    664                                  (rec (1+ level)
    665                                       (if (plusp pos)
    666                                           (min pos end)
    667                                           end)
    668                                       (cons h parents))
    669                                  (stream-position in end))
    670                              (when post-fn
    671                                (funcall post-fn in level i h start end parents))))
    672                    (end-of-file ()
    673                      (assert (zerop level))))))
    674         (rec 0 0 nil)))))
    675 
    676 (defun print-RecordHeader-tree (ole-file entry)
    677   (walk-RecordHeader-tree
    678    ole-file
    679    entry
    680    (lambda (in level i h start end parents)
    681      (declare (ignore in parents))
    682      (dotimes (j (* 2 level))
    683        (write-char #\space))
    684      (format t "~d #x~x #x~x #x~x ~d :: ~d ~d :: ~a~%"
    685              i
    686              (RecordHeader.recVer h)
    687              (RecordHeader.recInstance h)
    688              (RecordHeader.recType h)
    689              (RecordHeader.recLen h)
    690              start
    691              end
    692              (enum-by-value 'RecordType (RecordHeader.recType h))))))
    693 
    694 (defun print-RecordHeader-tree-from-ppt-file (filename)
    695   (with-stream (ole-file (ole-file-stream filename))
    696     (print-RecordHeader-tree
    697      ole-file
    698      (find-ole-entry ole-file :name "PowerPoint Document" :type 2))))
    699 
    700 (defun utf-char (n)                ;; TODO utf properly
    701   (assert (plusp n))
    702   (if (member n '(#x0a #x0b #x0d)) ;; #x0b = vertical tab
    703       "<br/>"
    704       (code-char n)))
    705 
    706 (defun ascii-char (n)
    707   (assert (plusp n))
    708   (if (member n '(#x0a #x0b #x0d)) ;; #x0b = vertical tab
    709       "<br/>"
    710       (code-char n)))
    711 
    712 (define-structure OfficeArtFOPTEOPID ()
    713   (%dummy ushort)
    714   (opid t :compute (logand #x3fff %dummy))
    715   (fBid t :compute (not (zerop (logand #x4000 %dummy))))
    716   (fComplex t :compute (not (zerop (logand #x8000 %dummy)))))
    717 
    718 (define-structure OfficeArtFBSE ()
    719   (btWin32 ubyte)
    720   (btMacOS ubyte)
    721   (rgbUid GUID)
    722   (tag ushort)
    723   (size dword)
    724   (cRef dword)
    725   (foDelay dword)
    726   (unused1 ubyte)
    727   (cbName ubyte)
    728   (unused2 ubyte)
    729   (unused3 ubyte)
    730   #+nil(nameData (ubyte cbName))
    731   #+nil(embeddedBlip (ubyte size)))
    732 
    733 (defun ppt-entry-to-html-naive (ole-file entry stream title pictures debug)
    734   (macrolet ((out (&rest args)
    735                `(format stream ,@args)))
    736     (let ((slide-no 0)
    737           (blip-no 0)
    738           (blips nil)
    739           ;; texts
    740           (text-slide-no nil)
    741           (text-no nil)
    742           (texts nil))
    743       (out "<html>~%<head>~%")
    744       (when title
    745         (out "<title>~a</title>~%" title))
    746       (out "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>~%")
    747       (out "<style>~%")
    748       (out ".h {border-left:1px solid gray;padding-left:0.5em}~%")
    749       (out ".m {color:gray}")
    750       (out "</style>~%")
    751       (out "</head>~%<body>~%")
    752       (when title
    753         (out "<a href=\"file://~a\">~a</a>~%" title title))
    754       (walk-RecordHeader-tree
    755        ole-file
    756        entry
    757        (lambda (in level i h start end parents)
    758          (declare (ignore end parents))
    759          (when debug
    760            ;; pre
    761            (when (and (zerop level) (plusp i))
    762              (out "<hr/>~%"))
    763            ;; msg
    764            (when debug
    765              (out "<div class=\"h\">~%<pre class=\"m\">~a ~a #x~x ~a</pre>~%"
    766                   (- start 8) ;; - record header size
    767                   (RecordHeader.recType h)
    768                   (RecordHeader.recType h)
    769                   (enum-by-value 'RecordType (RecordHeader.recType h)))))
    770          ;; post
    771          (case (RecordHeader.recType h)
    772            (#.RT_Document)
    773            (#.RT_SlideListWithText
    774             (setq text-slide-no 0))
    775            (#.RT_SlidePersistAtom
    776             (incf text-slide-no)
    777             (setq text-no 0))
    778            (#.RT_OfficeArtFBSE
    779             (let* ((x (read-OfficeArtFBSE in))
    780                    (y (find (OfficeArtFBSE.foDelay x) pictures :key #'cadr)))
    781               (assert y)
    782               (push (list (incf blip-no) (car y) (caddr y)) blips)
    783               #+nil
    784               (out "<div><p>@@@ ~a #x~x ~a === img ~s ~s</p>~%"
    785                    (RecordHeader.recType h)
    786                    (RecordHeader.recType h)
    787                    (enum-by-value 'RecordType (RecordHeader.recType h))
    788                    blip-no
    789                    (OfficeArtFBSE.foDelay x))))
    790            (#.RT_Slide
    791             (incf slide-no)
    792             (unless debug
    793               (when (< 1 slide-no)
    794                 (out "<hr/>~%")))
    795             (out "<div class=\"slide\">~%")
    796             (out "<h1><a name=\"slide~d\">Slide ~d</a></h1>~%" slide-no slide-no)
    797             (out "<pre><a href=\"#slide~d\">&lt;</a> <a href=\"#slide~d\">&gt;</a></pre>~%" (1- slide-no) (1+ slide-no)))
    798            ((#.RT_TextCharsAtom #.RT_CString) ;; utf16le
    799             (unless nil #+nil(or (member #.RT_PROGTAGS parents :key 'RecordHeader.recType)
    800                         (member #.RT_NOTES parents :key 'RecordHeader.recType)
    801                         (member #.RT_MAINMASTER parents :key 'RecordHeader.recType))
    802               (cond
    803                 #+nil
    804                 ((member #.RT_SlideListWithText parents :key 'RecordHeader.recType)
    805                  (push ;; TODO also slide-no + text-no inside slide
    806                   (list
    807                    text-slide-no
    808                    (incf text-no)
    809                    (with-output-to-string (s)
    810                      (loop
    811                         for j from 0 below (RecordHeader.recLen h) by 2
    812                         do (format s "~a" (utf-char (read-ushort in))))))
    813                   texts))
    814                 (t
    815                  (out "<p>")
    816                  (loop
    817                     for j from 0 below (RecordHeader.recLen h) by 2
    818                     do (out "~a" (utf-char (read-ushort in))))
    819                  (out "</p>~%")))))
    820            (#.RT_TextBytesAtom ;; ascii
    821             (unless nil #+nil(or (member #.RT_PROGTAGS parents :key 'RecordHeader.recType)
    822                         (member #.RT_NOTES parents :key 'RecordHeader.recType)
    823                         (member #.RT_MAINMASTER parents :key 'RecordHeader.recType))
    824               (cond
    825                 #+nil
    826                 ((member #.RT_SlideListWithText parents :key 'RecordHeader.recType)
    827                  (push ;; TODO also slide-no + text-no inside slide
    828                   (list
    829                    text-slide-no
    830                    (incf text-no)
    831                    (with-output-to-string (s)
    832                      (loop
    833                         for j from 0 below (RecordHeader.recLen h)
    834                         do (format s "~a" (ascii-char (read-octet in))))))
    835                   texts))
    836                 (t
    837                  (out "<p>")
    838                  (loop
    839                     for j from 0 below (RecordHeader.recLen h)
    840                     do (out "~a" (ascii-char (read-octet in))))
    841                  (out "</p>~%")))))
    842            (#.RT_OUTLINETEXTREFATOM
    843             (let* ((index (1+ (read-dword in)))
    844                    (text (caddr
    845                           (find-if (lambda (x)
    846                                      (and (= slide-no (car x))
    847                                           (= index (cadr x))))
    848                                    texts))))
    849               (if text
    850                   (out "<p>~a</p>~%" text)
    851                   (out "<p>!!!</p>~%"))))
    852            ;; TODO RT_DOCUMENT / RT_SLIDELISTWITHTEXT / RT_TEXTBYTESATOM
    853            (#.RT_OfficeArtFOPT
    854             (with-stream (s (shorter-stream in (RecordHeader.recLen h)))
    855               (let ((len (RecordHeader.recLen h)))
    856                 (loop
    857                    while (< (stream-position s) len)
    858                    do (let ((opid (read-OfficeArtFOPTEOPID s))
    859                             (value (read-dword s)))
    860                         ;;(out "<p>...... ~s ~s</p>~%" opid value)
    861                         (when (OfficeArtFOPTEOPID.fComplex opid)
    862                           (decf len value))
    863                         (case (OfficeArtFOPTEOPID.opid opid)
    864                           (#.pib
    865                            (assert (OfficeArtFOPTEOPID.fBid opid))
    866                            (destructuring-bind (j n ext) (assoc value blips)
    867                              (assert (and j n ext))
    868                              (out "<img src=\"~a.~(~a~)\"/>~%" n ext)))))))))))
    869        (lambda (in level i h start end parents)
    870          (declare (ignore in level i start end parents))
    871          (case (RecordHeader.recType h)
    872            (#.RT_Slide
    873             (out "</div>~%")))
    874          (when debug
    875            (format stream "</div>~%"))))
    876       ;;(out "~s~%" texts)
    877       (out "</body>~%</html>~%"))))
    878 
    879 (defun ppt-file-to-html-naive (filename &optional (stream *standard-output*))
    880   (with-stream (ole-file (ole-file-stream filename))
    881     (let ((pictures nil))
    882       ;;(extract-pictures ole-file dir html) ;; TODO mount olefs and traverse Pictures only once
    883       (walk-RecordHeader-tree ole-file
    884                               (find-ole-entry ole-file :name "Pictures" :type 2)
    885                               (lambda (in level i h start end parents)
    886                                 (declare (ignore level end parents))
    887                                 (multiple-value-bind (blip kind)
    888                                     (read-record-body in h)
    889                                   (declare (ignore blip))
    890                                   (push (list i (- start 8) kind) pictures))))
    891       (ppt-entry-to-html-naive ole-file
    892                                (find-ole-entry ole-file
    893                                                :name "PowerPoint Document"
    894                                                :type 2)
    895                                stream
    896                                filename
    897                                pictures
    898                                t))))
    899 
    900 (define-structure UserEditAtom ()
    901   (lastSlideIdRef dword)
    902   (version ushort)
    903   (minorVersion ubyte :always 0)
    904   (majorVersion ubyte :always 3)
    905   (offsetLastEdit dword)
    906   (offsetPersistDirectory dword)
    907   (docPersistIdRef dword :always 1)
    908   (persistIdSeed dword)
    909   (lastView ushort)
    910   (unused ushort)
    911   #+nil(encryptSessionPersistIdRef dword)) ;; TODO optional
    912 
    913 (defun ppt-entry-to-html (ole-file entry stream title)
    914   (macrolet ((out (&rest args)
    915                `(format stream ,@args)))
    916     (let ((slide-no 0))
    917       (out "<html>~%<head>~%")
    918       (when title
    919         (out "<title>~a</title>~%" title))
    920       (out "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>~%")
    921       (out "</head>~%<body>~%")
    922       (walk-RecordHeader-tree
    923        ole-file
    924        entry
    925        (lambda (in level i h start end parents)
    926          (declare (ignore i level start end parents))
    927          (case (RecordHeader.recType h)
    928            (#.RT_Document
    929             (out "<div>~%"))
    930            (#.RT_Slide
    931             (out "<hr/>~%</div>~%<div class=\"slide\">~%<h1>Slide ~d</h1>~%" (incf slide-no)))
    932            ((#.RT_TextCharsAtom #.RT_CString) ;; utf16le
    933             (out "<p>")
    934             (loop
    935                for j from 0 below (RecordHeader.recLen h) by 2
    936                do (out "~a" (utf-char (read-ushort in))))
    937             (out "</p>~%"))
    938            (#.RT_TextBytesAtom ;; ascii
    939             (out "<p>")
    940             (loop
    941                for j from 0 below (RecordHeader.recLen h)
    942                do (out "~a" (ascii-char (read-octet in))))
    943             (out "</p>~%")))))
    944       (out "</div>~%</body>~%</html>~%"))))
    945 
    946 (defun process-PersistDirectoryAtom (htab in)
    947   (dolist (entry (PersistDirectoryAtom-entries (read-record in)))
    948     (with-slots (persistId cPersist rgPersistOffset) entry
    949       (loop
    950          for n from 0
    951          for o across rgPersistOffset
    952          do (let ((k (+ persistId n)))
    953               ;;(print (list :??? persistId :+ n := k :-> o))
    954               (unless (gethash k htab)
    955                 ;;(print (list persistId :+ n := k :-> o))
    956                 (setf (gethash k htab) o)))))))
    957 
    958 (defun ppt-file-to-html (filename &optional (stream *standard-output*))
    959   (with-stream (ole-file (ole-file-stream filename))
    960     (let ((u (block CurrentUser
    961                (walk-RecordHeader-tree
    962                 ole-file
    963                 (find-ole-entry ole-file :name "Current User" :type 2)
    964                 (lambda (in level i h start end parents)
    965                   (declare (ignore level i start end parents))
    966                   (return-from CurrentUser
    967                     (cadr (read-record-body in h))))))))
    968       ;;(describe u)
    969       (let ((pictures nil))
    970         ;;(extract-pictures ole-file dir html) ;; TODO mount olefs and traverse Pictures only once
    971         (walk-RecordHeader-tree ole-file
    972                                 (find-ole-entry ole-file
    973                                                 :name "Pictures"
    974                                                 :type 2)
    975                                 (lambda (in level i h start end parents)
    976                                   (declare (ignore level end parents))
    977                                   (multiple-value-bind (blip kind)
    978                                       (read-record-body in h)
    979                                     (declare (ignore blip))
    980                                     (push (list i (- start 8) kind) pictures))))
    981         (print (list :pictures pictures))
    982         (with-stream (in (ole-entry-stream
    983                           ole-file
    984                           (find-ole-entry ole-file
    985                                           :name "PowerPoint Document"
    986                                           :type 2)))
    987           (let ((htab (make-hash-table)) ;; persist oid -> fpos
    988                 (first-UserEditAtom nil))
    989             (stream-position in (CurrentUserAtom.offsetToCurrentEdit u))
    990             (loop
    991                for e = (cadr (read-record in)) then (cadr (read-record in))
    992                do (progn
    993                     ;;(describe e)
    994                     (unless first-UserEditAtom
    995                       (setq first-UserEditAtom e))
    996                     (stream-position in (UserEditAtom.offsetPersistDirectory e))
    997                     (process-PersistDirectoryAtom htab in))
    998                until (zerop (UserEditAtom.offsetLastEdit e))
    999                do (stream-position in (UserEditAtom.offsetLastEdit e)))
   1000             ;; live PersistDirectory
   1001             (let ((persist-directory nil))
   1002               (maphash (lambda (k v) (push (cons k v) persist-directory)) htab)
   1003               (setq persist-directory (sort persist-directory #'< :key #'car))
   1004               (print persist-directory))
   1005             ;; live DocumentContainer
   1006             (print (gethash (UserEditAtom.docPersistIdRef first-UserEditAtom) htab)))
   1007           #+nil(stream-position in 0)
   1008           #+nil(print (read-record in)))))))
   1009 
   1010 ;;; MS-DOC Word (.doc) Binary File Format
   1011 
   1012 (define-structure FibBase ()
   1013   (wIdent ushort)
   1014   (nFib ushort)
   1015   (unused ushort)
   1016   (lid ushort)
   1017   (pnNext ushort)
   1018   (flags1 ushort) ;; TODO
   1019   (nFibBack ushort :member '(#xbf #xc1))
   1020   (lKey dword)
   1021   (envr ubyte)       ;; TODO :always 0?
   1022   (flags2 ubyte)     ;; TODO
   1023   (reserved3 ushort) ;; TODO :always 0?
   1024   (reserved4 ushort) ;; TODO :always 0?
   1025   (reserved5 dword)
   1026   (reserved6 dword))
   1027 
   1028 (define-structure FibRgFcLcb97 ()
   1029   (fcStshfOrig dword)
   1030   (lcbStshfOrig dword)
   1031   (fcStshf dword)
   1032   (lcbStshf dword)
   1033   (fcPlcffndRef dword)
   1034   (lcbPlcffndRef dword)
   1035   (fcPlcffndTxt dword)
   1036   (lcbPlcffndTxt dword)
   1037   (fcPlcfandRef dword)
   1038   (lcbPlcfandRef dword)
   1039   (fcPlcfandTxt dword)
   1040   (lcbPlcfandTxt dword)
   1041   (fcPlcfSed dword)
   1042   (lcbPlcfSed dword)
   1043   (fcPlcPad dword)
   1044   (lcbPlcPad dword)
   1045   (fcPlcfPhe dword)
   1046   (lcbPlcfPhe dword)
   1047   (fcSttbfGlsy dword)
   1048   (lcbSttbfGlsy dword)
   1049   (fcPlcfGlsy dword)
   1050   (lcbPlcfGlsy dword)
   1051   (fcPlcfHdd dword)
   1052   (lcbPlcfHdd dword)
   1053   (fcPlcfBteChpx dword)
   1054   (lcbPlcfBteChpx dword)
   1055   (fcPlcfBtePapx dword)
   1056   (lcbPlcfBtePapx dword)
   1057   (fcPlcfSea dword)
   1058   (lcbPlcfSea dword)
   1059   (fcSttbfFfn dword)
   1060   (lcbSttbfFfn dword)
   1061   (fcPlcfFldMom dword)
   1062   (lcbPlcfFldMom dword)
   1063   (fcPlcfFldHdr dword)
   1064   (lcbPlcfFldHdr dword)
   1065   (fcPlcfFldFtn dword)
   1066   (lcbPlcfFldFtn dword)
   1067   (fcPlcfFldAtn dword)
   1068   (lcbPlcfFldAtn dword)
   1069   (fcPlcfFldMcr dword)
   1070   (lcbPlcfFldMcr dword)
   1071   (fcSttbfBkmk dword)
   1072   (lcbSttbfBkmk dword)
   1073   (fcPlcfBkf dword)
   1074   (lcbPlcfBkf dword)
   1075   (fcPlcfBkl dword)
   1076   (lcbPlcfBkl dword)
   1077   (fcCmds dword)
   1078   (lcbCmds dword)
   1079   (fcUnused1 dword)
   1080   (lcbUnused1 dword)
   1081   (fcSttbfMcr dword)
   1082   (lcbSttbfMcr dword)
   1083   (fcPrDrvr dword)
   1084   (lcbPrDrvr dword)
   1085   (fcPrEnvPort dword)
   1086   (lcbPrEnvPort dword)
   1087   (fcPrEnvLand dword)
   1088   (lcbPrEnvLand dword)
   1089   (fcWss dword)
   1090   (lcbWss dword)
   1091   (fcDop dword)
   1092   (lcbDop dword)
   1093   (fcSttbfAssoc dword)
   1094   (lcbSttbfAssoc dword)
   1095   (fcClx dword)
   1096   (lcbClx dword)
   1097   (fcPlcfPgdFtn dword)
   1098   (lcbPlcfPgdFtn dword)
   1099   (fcAutosaveSource dword)
   1100   (lcbAutosaveSource dword)
   1101   (fcGrpXstAtnOwners dword)
   1102   (lcbGrpXstAtnOwners dword)
   1103   (fcSttbfAtnBkmk dword)
   1104   (lcbSttbfAtnBkmk dword)
   1105   (fcUnused2 dword)
   1106   (lcbUnused2 dword)
   1107   (fcUnused3 dword)
   1108   (lcbUnused3 dword)
   1109   (fcPlcSpaMom dword)
   1110   (lcbPlcSpaMom dword)
   1111   (fcPlcSpaHdr dword)
   1112   (lcbPlcSpaHdr dword)
   1113   (fcPlcfAtnBkf dword)
   1114   (lcbPlcfAtnBkf dword)
   1115   (fcPlcfAtnBkl dword)
   1116   (lcbPlcfAtnBkl dword)
   1117   (fcPms dword)
   1118   (lcbPms dword)
   1119   (fcFormFldSttbs dword)
   1120   (lcbFormFldSttbs dword)
   1121   (fcPlcfendRef dword)
   1122   (lcbPlcfendRef dword)
   1123   (fcPlcfendTxt dword)
   1124   (lcbPlcfendTxt dword)
   1125   (fcPlcfFldEdn dword)
   1126   (lcbPlcfFldEdn dword)
   1127   (fcUnused4 dword)
   1128   (lcbUnused4 dword)
   1129   (fcDggInfo dword)
   1130   (lcbDggInfo dword)
   1131   (fcSttbfRMark dword)
   1132   (lcbSttbfRMark dword)
   1133   (fcSttbfCaption dword)
   1134   (lcbSttbfCaption dword)
   1135   (fcSttbfAutoCaption dword)
   1136   (lcbSttbfAutoCaption dword)
   1137   (fcPlcfWkb dword)
   1138   (lcbPlcfWkb dword)
   1139   (fcPlcfSpl dword)
   1140   (lcbPlcfSpl dword)
   1141   (fcPlcftxbxTxt dword)
   1142   (lcbPlcftxbxTxt dword)
   1143   (fcPlcfFldTxbx dword)
   1144   (lcbPlcfFldTxbx dword)
   1145   (fcPlcfHdrtxbxTxt dword)
   1146   (lcbPlcfHdrtxbxTxt dword)
   1147   (fcPlcffldHdrTxbx dword)
   1148   (lcbPlcffldHdrTxbx dword)
   1149   (fcStwUser dword)
   1150   (lcbStwUser dword)
   1151   (fcSttbTtmbd dword)
   1152   (lcbSttbTtmbd dword)
   1153   (fcCookieData dword)
   1154   (lcbCookieData dword)
   1155   (fcPgdMotherOldOld dword)
   1156   (lcbPgdMotherOldOld dword)
   1157   (fcBkdMotherOldOld dword)
   1158   (lcbBkdMotherOldOld dword)
   1159   (fcPgdFtnOldOld dword)
   1160   (lcbPgdFtnOldOld dword)
   1161   (fcBkdFtnOldOld dword)
   1162   (lcbBkdFtnOldOld dword)
   1163   (fcPgdEdnOldOld dword)
   1164   (lcbPgdEdnOldOld dword)
   1165   (fcBkdEdnOldOld dword)
   1166   (lcbBkdEdnOldOld dword)
   1167   (fcSttbfIntlFld dword)
   1168   (lcbSttbfIntlFld dword)
   1169   (fcRouteSlip dword)
   1170   (lcbRouteSlip dword)
   1171   (fcSttbSavedBy dword)
   1172   (lcbSttbSavedBy dword)
   1173   (fcSttbFnm dword)
   1174   (lcbSttbFnm dword)
   1175   (fcPlfLst dword)
   1176   (lcbPlfLst dword)
   1177   (fcPlfLfo dword)
   1178   (lcbPlfLfo dword)
   1179   (fcPlcfTxbxBkd dword)
   1180   (lcbPlcfTxbxBkd dword)
   1181   (fcPlcfTxbxHdrBkd dword)
   1182   (lcbPlcfTxbxHdrBkd dword)
   1183   (fcDocUndoWord9 dword)
   1184   (lcbDocUndoWord9 dword)
   1185   (fcRgbUse dword)
   1186   (lcbRgbUse dword)
   1187   (fcUsp dword)
   1188   (lcbUsp dword)
   1189   (fcUskf dword)
   1190   (lcbUskf dword)
   1191   (fcPlcupcRgbUse dword)
   1192   (lcbPlcupcRgbUse dword)
   1193   (fcPlcupcUsp dword)
   1194   (lcbPlcupcUsp dword)
   1195   (fcSttbGlsyStyle dword)
   1196   (lcbSttbGlsyStyle dword)
   1197   (fcPlgosl dword)
   1198   (lcbPlgosl dword)
   1199   (fcPlcocx dword)
   1200   (lcbPlcocx dword)
   1201   (fcPlcfBteLvc dword)
   1202   (lcbPlcfBteLvc dword)
   1203   (dwLowDateTime dword)
   1204   (dwHighDateTime dword)
   1205   (fcPlcfLvcPre10 dword)
   1206   (lcbPlcfLvcPre10 dword)
   1207   (fcPlcfAsumy dword)
   1208   (lcbPlcfAsumy dword)
   1209   (fcPlcfGram dword)
   1210   (lcbPlcfGram dword)
   1211   (fcSttbListNames dword)
   1212   (lcbSttbListNames dword)
   1213   (fcSttbfUssr dword)
   1214   (lcbSttbfUssr dword))
   1215 
   1216 #+nil
   1217 (define-structure FibRgCswNew ()
   1218   (nFibNew ushort :member '(#x00D9 #x0101 #x010C #x0112))
   1219   rgCswNewData (variable): Depending on the value of nFibNew this is one of the following.
   1220   Value of nFibNew
   1221   Meaning
   1222   0x00D9
   1223   fibRgCswNewData2000 (2 bytes)
   1224   0x0101
   1225   fibRgCswNewData2000 (2 bytes)
   1226   0x010C
   1227   fibRgCswNewData2000 (2 bytes)
   1228   0x0112
   1229   fibRgCswNewData2007 (8 bytes) )
   1230 
   1231 (defstruct fib base csw fibRgW cslw fibRgLw cbRgFcLcb fibRgFcLcbBlob fibRgFcLcb
   1232            cswNew fibRgCswNew)
   1233 
   1234 (defun read-fib (stream)
   1235   (let* ((base (read-fibbase stream))
   1236          (csw (let ((x (read-ushort stream)))
   1237                 (assert (= x #x0e))
   1238                 x))
   1239          (fibRgW (read-vector stream 28 '(unsigned-byte 8) 'read-octet))
   1240          (cslw (let ((x (read-ushort stream)))
   1241                  (assert (= x #x16))
   1242                  x))
   1243          (fibRgLw (read-vector stream 88 '(unsigned-byte 8) 'read-octet))
   1244          (cbRgFcLcb (read-ushort stream))
   1245          (fibRgFcLcbBlob-position (stream-position stream))
   1246          (fibRgFcLcbBlob (read-vector stream (* 8 cbRgFcLcb) '(unsigned-byte 8) 'read-octet))
   1247          (cswNew (read-ushort stream))
   1248          (fibRgCswNew (read-vector stream cswNew '(unsigned-byte 8) 'read-octet))
   1249          #+nil
   1250          (nFib (if (zerop cswNew)
   1251                    (FibBase.nFib base)
   1252                    -1 #+nil(assert (zerop cswNew))))) ;; TODO implement this case
   1253     (assert
   1254      (member cbRgFcLcb '(#x5d #x6c #x88 #xa4 #xb7))
   1255      #+nil ;; spec says as bellow:-{
   1256      (= cbRgFcLcb (ecase nFib
   1257                     (#x0c1 #x5d) ;;;; < should be
   1258                     (#x0d9 #x6c)
   1259                     (#x101 #x88)
   1260                     (#x10c #xa4) ;;;; < actually is
   1261                     (#x112 #xb7))))
   1262     #+nil
   1263     (assert (= cswNew (ecase nFib
   1264                         (#x0c1 0)
   1265                         (#x0d9 2)
   1266                         (#x101 2)
   1267                         (#x10c 2)
   1268                         (#x112 5))))
   1269     ;;(print (list :@@@-nfib nFib))
   1270     (make-fib :base base
   1271               :csw csw
   1272               :fibRgW fibRgW
   1273               :cslw cslw
   1274               :fibRgLw fibRgLw
   1275               :cbRgFcLcb cbRgFcLcb
   1276               :fibRgFcLcbBlob fibRgFcLcbBlob
   1277               :fibRgFcLcb (with-stream (s (vector-stream fibRgFcLcbBlob
   1278                                                          fibRgFcLcbBlob-position))
   1279                             (read-FibRgFcLcb97 s))
   1280               :cswNew cswNew
   1281               :fibRgCswNew fibRgCswNew)))
   1282 
   1283 (define-structure LSTF ()
   1284   (lsid dword) ;; TODO signed, not -1 (or #xffffffff)
   1285   (tplc dword)
   1286   (rgistdPara (ushort 9))
   1287   (flags ubyte)
   1288   (fSimpleList t :compute (not (zerop (logand #x01 flags))))
   1289   (unused1 t :compute (not (zerop (logand #x02 flags))))
   1290   (fAutoNum t :compute (not (zerop (logand #x04 flags))))
   1291   (unused2 t :compute (not (zerop (logand #x08 flags))))
   1292   (fHybrid t :compute (not (zerop (logand #x10 flags))))
   1293   (reserved1 t :compute (logand #xe0 flags)) ;; TODO :always 0
   1294   (grfhic ubyte))
   1295 
   1296 (defun read-PlfLst (stream)
   1297   (let* ((cLst (read-ushort stream))
   1298          (z (make-array cLst)))
   1299     (dotimes (i cLst z)
   1300       (setf (aref z i) (read-lstf stream)))))
   1301 
   1302 (define-structure LVLF ()
   1303   (iStartAt dword) ;; TODO signed
   1304   (nfc ubyte) ;; TODO MUST not be equal to 0x08, 0x09, 0x0F, or 0x13
   1305   (flags ubyte)
   1306   (jc t :compute (logand #x03 flags))
   1307   (fLegal t :compute (not (zerop (logand #x04 flags))))
   1308   (fNoRestart t :compute (not (zerop (logand #x08 flags))))
   1309   (fIndentSav t :compute (not (zerop (logand #x10 flags))))
   1310   (fConverted t :compute (not (zerop (logand #x20 flags))))
   1311   (unused1 t :compute (not (zerop (logand #x40 flags))))
   1312   (fTentative t :compute (not (zerop (logand #x80 flags))))
   1313   (rgbxchNums (ubyte 9))
   1314   (ixchFollow ubyte)
   1315   (dxaIndentSav dword) ;; TODO signed
   1316   (unused2 dword)
   1317   (cbGrpprlChpx ubyte)
   1318   (cbGrpprlPapx ubyte)
   1319   (ilvlRestartLim ubyte)
   1320   (grfhic ubyte))
   1321 
   1322 (defstruct LVL lvlf grpprlPapx grpprlChpx xst)
   1323 
   1324 (define-structure Sprm ()
   1325   (flags ushort)
   1326   (ispmd t :compute (logand #x01ff flags))
   1327   (fSpec t :compute (not (zerop (logand #x0200 flags))))
   1328   (sgc t :compute (logand #x07 (ash flags -10)))
   1329   (spra t :compute (logand #x07 (ash flags -13))))
   1330 
   1331 (defstruct PChgTabsDelClose cTabs rgdxaDel rgdxaClose)
   1332 
   1333 (defun read-PChgTabsDelClose (stream)
   1334   (let ((cTabs (read-octet stream)))
   1335     (assert (<= 0 cTabs 64))
   1336     (let ((rgdxaDel (read-vector stream cTabs t 'read-ushort))
   1337           (rgdxaClose (read-vector stream cTabs t 'read-ushort)))
   1338       (assert (equalp rgdxaDel (sort (copy-seq rgdxaDel) #'<=)))
   1339       (make-PChgTabsDelClose :cTabs cTabs
   1340                              :rgdxaDel rgdxaDel
   1341                              :rgdxaClose rgdxaClose))))
   1342 
   1343 (defstruct PChgTabsAdd cTabs rgdxaAdd rgtbdAdd)
   1344 
   1345 (defun read-PChgTabsAdd (stream)
   1346   (let ((cTabs (read-octet stream)))
   1347     (assert (<= 0 cTabs 64))
   1348     (let ((rgdxaAdd (read-vector stream cTabs t 'read-ushort))
   1349           (rgtbdAdd (read-vector stream cTabs t 'read-octet))) ;; TODO decode TBD struct
   1350       (assert (equalp rgdxaAdd (sort (copy-seq rgdxaAdd) #'<=)))
   1351       (make-PChgTabsAdd :cTabs cTabs
   1352                         :rgdxaAdd rgdxaAdd
   1353                         :rgtbdAdd rgtbdAdd))))
   1354 
   1355 (defstruct PChgTabsOperand cb DelClose Add)
   1356 
   1357 (defun read-PChgTabsOperand (stream)
   1358   (let ((cb (read-octet stream)))
   1359     (assert (< 1 cb 255)) ;; TODO 255
   1360     ;;(read-vector stream cb t 'read-octet)
   1361     (make-PChgTabsOperand :cb cb
   1362                           :DelClose (read-PChgTabsDelClose stream)
   1363                           :Add (read-PChgTabsAdd stream))))
   1364 
   1365 (defstruct Prl sprm operand)
   1366 
   1367 (defun read-Prl (stream)
   1368   (let ((sprm (read-Sprm stream)))
   1369     ;; (when (zerop (Sprm.sgc sprm))
   1370     ;;   (print (list :@@@-!!! (read-vector stream 10 t 'read-octet))))
   1371     (assert (member (Sprm.sgc sprm) '(1 2 3 4 5)))
   1372     (make-Prl
   1373      :sprm sprm
   1374      :operand (ecase (Sprm.spra sprm)
   1375                 (0 (read-octet stream))
   1376                 (1 (read-octet stream))
   1377                 (2 (read-ushort stream))
   1378                 (3 (read-dword stream))
   1379                 (4 (read-ushort stream))
   1380                 (5 (read-ushort stream))
   1381                 (6 (flet ((rd ()
   1382                             (read-vector stream (read-octet stream) t 'read-octet)))
   1383                      (ecase (Sprm.sgc sprm)
   1384                        (1 (ecase (Sprm.flags sprm) ;; par
   1385                             (#xc615 (read-PChgTabsOperand stream))))
   1386                        (2 (rd))     ;; char
   1387                        (3 (rd))     ;; pic
   1388                        (4 (rd))     ;; sec
   1389                        #+nil(5 )))) ;; tab
   1390                 (7 (read-vector stream 3 t 'read-octet))))))
   1391 
   1392 (defstruct Xst blob parsed)
   1393 
   1394 (defun read-Xst (stream)
   1395   ;;(read-vector stream (read-ushort stream) t 'read-ushort)
   1396   (let* ((cch (read-ushort stream))
   1397          (blob (read-vector stream cch t 'read-ushort)))
   1398     (make-Xst :blob blob
   1399               :parsed nil
   1400               #+nil(with-output-to-string (out)
   1401                      (dotimes (i cch)
   1402                        (format out "~a" (utf-char (aref blob i))))))))
   1403 
   1404 (defun read-LVL (stream)
   1405   (let ((lvlf (read-lvlf stream)))
   1406     ;;(describe lvlf)
   1407     (make-LVL
   1408      :lvlf lvlf
   1409      :grpprlPapx (read-vector stream (LVLF.cbGrpprlPapx lvlf) t 'read-octet)
   1410      :grpprlChpx (read-vector stream (LVLF.cbGrpprlChpx lvlf) t 'read-octet)
   1411      ;; :grpprlPapx (read-vector stream (LVLF.cbGrpprlPapx lvlf) t 'read-prl)
   1412      ;; :grpprlChpx (read-vector stream (LVLF.cbGrpprlChpx lvlf) t 'read-prl)
   1413      :xst (read-Xst stream))))
   1414 
   1415 (defun fix-numbering (filename)
   1416   (let (offsets)
   1417     (with-stream (ole-file (ole-file-stream filename))
   1418       #+nil(break "~s" ole-file)
   1419       (let (fcPlfLst lcbPlfLst)
   1420         (with-stream (in (ole-entry-stream
   1421                           ole-file
   1422                           (find-ole-entry ole-file
   1423                                           :name "WordDocument"
   1424                                           :type 2)))
   1425           (let ((fib (read-fib in)))
   1426             ;;(describe fib)
   1427             (let ((x (fib-fibRgFcLcb fib)))
   1428               (setq fcPlfLst (FibRgFcLcb97.fcPlfLst x)
   1429                     lcbPlfLst (FibRgFcLcb97.lcbPlfLst x)))
   1430             #+nil
   1431             (multiple-value-bind (fcPlfLst lcbPlfLst)
   1432                 (with-stream (s (vector-stream (subseq (fib-fibRgFcLcbBlob fib) #.(* 4 146))))
   1433                   (values (read-dword s) (read-dword s)))
   1434               (print (list :@@@ fcPlfLst lcbPlfLst))
   1435               )))
   1436         (with-stream (in (ole-entry-stream
   1437                           ole-file
   1438                           (find-ole-entry ole-file
   1439                                           :name '("0Table" "1Table") ;; TODO be sure which one?
   1440                                           :type 2)))
   1441           (stream-position in fcPlfLst)
   1442           (let ((PlfLst (read-PlfLst in)))
   1443             (let ((n 0))
   1444               (dotimes (i (length PlfLst))
   1445                 (incf n (if (LSTF.fSimpleList (aref PlfLst i)) 1 9)))
   1446               (let ((lvls (make-array n)))
   1447                 (dotimes (i n)
   1448                   (setf (aref lvls i) (read-lvl in)))
   1449                 ;; now I have lstf[] and lvl[]
   1450                 (let (anums ;; roughly like w:abstractNum
   1451                       (j 0))
   1452                   (dotimes (i (length PlfLst))
   1453                     (let ((lstf (aref PlfLst i)))
   1454                       (unless (LSTF.fSimpleList lstf)
   1455                         (push (list i #+nil lstf j) anums))
   1456                       (incf j (if (LSTF.fSimpleList lstf) 1 9))))
   1457                   (setq anums (nreverse anums))
   1458                   ;;(print anums)
   1459                   (dolist (a anums)
   1460                     (destructuring-bind (i j) a ;; i_lstf j_lvl0
   1461                       (declare (ignore i))
   1462                       (let* ((lvl (aref lvls (1+ j))) ;; hardcode second level
   1463                              (lvlf (LVL-lvlf lvl)))
   1464                         ;;(print (list :@@@ j (LVLF.fNoRestart lvlf) (LVLF.ilvlRestartLim lvlf)))
   1465                         (push (LVLF.%physical-stream-position lvlf) offsets)))))
   1466                 #+nil
   1467                 (dotimes (i n)
   1468                   (let* ((lvl (aref lvls i))
   1469                          (lvlf (LVL-lvlf lvl)))
   1470                     (print (list :@@@ i (LVLF.fNoRestart lvlf) (LVLF.ilvlRestartLim lvlf)))))))))
   1471         #+nil(values fcPlfLst lcbPlfLst)))
   1472     (let ((fixed (format nil "~a.fixed.doc" filename)))
   1473       (copy-file filename fixed)
   1474       ;;(print (list :@@@-offsets offsets))
   1475       (with-open-file (s fixed
   1476                          :direction :io
   1477                          :if-exists :overwrite
   1478                          :if-does-not-exist :error
   1479                          :element-type '(unsigned-byte 8))
   1480         (dolist (o offsets)
   1481           (stream-position s (+ 5 o))
   1482           (let ((flags (read-octet s)))
   1483             (stream-position s (+ 5 o))
   1484             (write-byte (logior #x08 flags) s)
   1485             #+nil(write-byte (logand #x07 flags) s))
   1486           (stream-position s (+ 26 o))
   1487           (write-byte 0 s))))))
   1488 
   1489 (defun extract-files (filename &optional (dir "/tmp"))
   1490   (with-stream (ole-file (ole-file-stream filename))
   1491     (do ((s (ole-directory-stream ole-file))
   1492          e
   1493          (i 0))
   1494         ((not (setq e (funcall s))))
   1495       (print-ole-entry e *standard-output*)
   1496       (terpri)
   1497       (ecase (ole-entry.object-type e)
   1498         ((0 1 5))
   1499         (2 (with-stream (in (ole-entry-stream ole-file e))
   1500              (with-open-file (out (format nil "~a/XX-~d" dir (incf i))
   1501                                   :direction :output
   1502                                   :if-does-not-exist :create
   1503                                   :if-exists :supersede
   1504                                   :element-type '(unsigned-byte 8))
   1505                (copy-stream in out))))))))
   1506 
   1507 ;;; MS-XLS Excel binary file
   1508 
   1509 (define-structure BIFFRecordHeader ()
   1510   (tag ushort)
   1511   (length ushort))
   1512 
   1513 (define-structure BIFF-ShortXLUnicodeString ()
   1514   (cch ubyte)
   1515   (%dummy ubyte :member '(0 1))
   1516   (fHighByte t :compute (not (zerop (logand 1 %dummy))))
   1517   (reserved1 t :compute (assert (zerop (logand #xfe %dummy))))
   1518   (rgb (ubyte (if fHighByte (* 2 cch) cch)))
   1519   (decoded t :compute (string-from-octets rgb fHighByte)))
   1520 
   1521 (define-structure BIFF-BoundSheet8 ()
   1522   (lbPlyPos dword)
   1523   (hsState ubyte :member '(0 1 2))
   1524   (dt ubyte :member '(0 1 2 6))
   1525   (stName BIFF-ShortXLUnicodeString))
   1526 
   1527 (define-structure BIFF-Cell ()
   1528   (rw ushort)
   1529   (col ushort)
   1530   (ixfe ushort))
   1531 
   1532 (define-structure BIFF-Blank ()
   1533   (cell BIFF-Cell))
   1534 
   1535 (define-structure BIFF-RkNumber ()
   1536   (%dummy dword)
   1537   (percent t :compute (not (zerop (logand 1 %dummy))))
   1538   (signed t :compute (not (zerop (logand 2 %dummy))))
   1539   (value t :compute (let ((y (if signed
   1540                                  (error "TODO") ;;(ash x -2)
   1541                                  (double-float-from-bits
   1542                                   (logand #xfffffffc %dummy) 0))))
   1543                       (if percent (/ y 100) y))))
   1544 
   1545 (define-structure BIFF-RkRec ()
   1546   (ixfe ushort)
   1547   (rk BIFF-RkNumber))
   1548 
   1549 (define-structure BIFF-RK ()
   1550   (rw ushort)
   1551   (col ushort)
   1552   (rkRec BIFF-RkRec))
   1553 
   1554 (define-structure BIFF-Bes ()
   1555   (bBoolErr ubyte)
   1556   (fError ubyte :member '(0 1))
   1557   (decoded t :compute (if (zerop fError)
   1558                           (ecase bBoolErr
   1559                             (0 nil)
   1560                             (1 t))
   1561                           (ecase fError
   1562                             (0 :#NULL!)
   1563                             (7 :#DIV/0!)
   1564                             (#xf :#VALUE!)
   1565                             (#x17 :#REF!)
   1566                             (#x1d :#NAME!)
   1567                             (#x24 :#NUM!)
   1568                             (#x2a :#N/A)
   1569                             (#x2b :#GETTING_DATA)))))
   1570 
   1571 (define-structure BIFF-BoolErr ()
   1572   (cell BIFF-Cell)
   1573   (bes BIFF-Bes))
   1574 
   1575 (define-structure BIFF-Number ()
   1576   (cell BIFF-Cell)
   1577   (num ulonglong)) ;; TODO double
   1578 
   1579 (define-structure BIFF-LabelSst ()
   1580   (cell BIFF-Cell)
   1581   (isst dword))
   1582 
   1583 (define-structure BIFF-FormulaValue () ;; TODO
   1584   (byte1 ubyte)
   1585   (byte2 ubyte)
   1586   (byte3 ubyte)
   1587   (byte4 ubyte)
   1588   (byte5 ubyte)
   1589   (byte6 ubyte)
   1590   (fExprO ushort))
   1591 
   1592 #+nil
   1593 (define-structure BIFF-CellParsedFormula () ;; TODO
   1594   (cce ushort)
   1595   (rgce (ubyte cce))
   1596   (rgcb BIFF-RgbExtra))
   1597 
   1598 #+nil
   1599 (define-structure BIFF-Formula () ;; TODO also probably wrong bit fiddling
   1600   (cell BIFF-Cell)
   1601   (val BIFF-FormulaValue)
   1602   (%dummy ushort)
   1603   (fAlwaysCalc t :compute (not (zerop (logand #x8000))))
   1604   (reserved1 t :compute (assert (zerop (logand #x4000))))
   1605   (fFill t :compute (not (zerop (logand #x2000))))
   1606   (fShrFmla t :compute (not (zerop (logand #x1000))))
   1607   (reserved2 t :compute (assert (zerop (logand #x800))))
   1608   (fClearErrors t :compute (not (zerop (logand #x400))))
   1609   (reserved3 t :compute (assert (zerop (logand #x3ff))))
   1610   (chn dword)
   1611   (formula BIFF-CellParsedFormula))
   1612 
   1613 #+nil
   1614 (define-structure BIFF-MulBlank () ;; TODO
   1615   (rw ushort)
   1616   (colFirst ushort))
   1617 
   1618 (define-structure BIFF-FormatRun ()
   1619   (ich ushort)
   1620   (ifnt ushort))
   1621 
   1622 (define-structure BIFF-LPWideString ()
   1623   (cchCharacters ushort)
   1624   (rgchData (wchar cchCharacters))
   1625   (decoded t :compute (string-from-wchars rgchData)))
   1626 
   1627 (define-structure BIFF-RPHSSub ()
   1628   (crun ushort)
   1629   (cch ushort)
   1630   (st BIFF-LPWideString))
   1631 
   1632 (define-structure BIFF-PhRuns ()
   1633   (ichFirst ushort) ;; TODO signed
   1634   (ichMom ushort)   ;; TODO signed
   1635   (cchMom ushort))  ;; TODO signed
   1636 
   1637 (define-structure BIFF-ExtRst ()
   1638   (reserved ushort)
   1639   (cb ushort)
   1640   (phs dword)
   1641   (rphssub BIFF-RPHSSub)
   1642   (rgphruns (BIFF-PhRuns (BIFF-RPHSSub.crun rphssub))))
   1643 
   1644 (defvar *fHighByte*) ;; nil|0|1 ;; TODO clean up nil|t vs nil|0|1
   1645 
   1646 (defun read-ustring (stream nchars fHighByte)
   1647   (let ((*fHighByte* fHighByte)
   1648         (b (make-array (* 2 nchars)
   1649                        :element-type 'character
   1650                        :fill-pointer 0)))
   1651     (dotimes (i nchars (coerce b 'string))
   1652       (vector-push-extend
   1653        (code-char (let ((c (ecase *fHighByte*
   1654                              (0 (read-octet stream))
   1655                              (1 (logior (read-octet stream)
   1656                                         (ash (read-octet stream) 8))))))
   1657                     (assert (plusp c))
   1658                     c))
   1659        b))))
   1660 
   1661 (define-structure BIFF-XLUnicodeRichExtendedString ()
   1662   (cch ushort)
   1663   (%dummy ubyte)
   1664   (fHighByte t :compute (logand 1 %dummy))
   1665   (reserved1 t :compute (assert (zerop (logand 2 %dummy))))
   1666   (fExtSt t :compute (not (zerop (logand 4 %dummy))))
   1667   (fRichSt t :compute (not (zerop (logand 8 %dummy))))
   1668   (reserved2 t :compute (assert (zerop (logand #xf0 %dummy))))
   1669   (cRun ushort :when fRichSt :default 0)
   1670   (cbExtRst dword :when fExtSt :default 0)
   1671   (rgb t :compute (read-ustring stream cch fHighByte))
   1672   (rgRun (BIFF-FormatRun cRun) :when fRichSt :default #())
   1673   (ExtRst (BIFF-ExtRst cbExtRst) :when fExtSt :default #()))
   1674 
   1675 (define-structure BIFF-SST ()
   1676   (cstTotal dword)
   1677   (cstUnique dword)
   1678   (rgb (BIFF-XLUnicodeRichExtendedString cstUnique)))
   1679 
   1680 (define-structure BIFF-DefColWidth ()
   1681   (cchdefColWidth ushort))
   1682 
   1683 (define-structure BIFF-Index ()
   1684   (reserved dword :always 0)
   1685   (rwMic dword)
   1686   (rwMac dword)
   1687   (ibXF dword)
   1688   (rgibRw (dword 1)))
   1689 
   1690 (defun biff-continue-stream (stream size)
   1691   ;; like SHORTER-STREAM but makes continue records transparent
   1692   (let ((offset 0)
   1693         self)
   1694     (setq self
   1695           (lambda (msg)
   1696             (assert stream)
   1697             (ecase msg
   1698               (close (setq stream nil))
   1699               (stream-position offset)
   1700               (physical-stream-position (physical-stream-position stream))
   1701               (read-octet
   1702                (unless (< offset size)
   1703                  (when (eql #x3c (read-ushort stream)) ;; continue record
   1704                    (let ((n (read-ushort stream)))
   1705                      (assert (< 0 n 8225)) ;; TODO biff8 or 2081 biff2-5
   1706                      (incf size n)
   1707                      (when *fHighByte*
   1708                        (setq *fHighByte* (logand 1 (read-octet stream)))
   1709                        (decf size)))))
   1710                (unless (< offset size)
   1711                  (error 'end-of-file :stream self))
   1712                (incf offset)
   1713                (read-octet stream)))))))
   1714 
   1715 (defun biff-substream (ole-entry-stream)
   1716   (let ((in ole-entry-stream)
   1717         end
   1718         eof)
   1719     (flet ((header ()
   1720              (let* ((h (read-BIFFRecordHeader in))
   1721                     (nbytes (BIFFRecordHeader.length h)))
   1722                (setq end (+ (stream-position in) nbytes))
   1723                (values (BIFFRecordHeader.tag h)
   1724                        (biff-continue-stream in nbytes)))))
   1725       (assert (member (header) '(#x0009 #x0209 #x0409 #x0809))) ;; bof
   1726       (lambda ()
   1727         (assert (not eof))
   1728         (stream-position in end)
   1729         (multiple-value-bind (tag s) (header)
   1730           (case tag ;; TODO more cell types
   1731             (#x000a (not (setq eof t)))
   1732             ;;(#x000b :index1)
   1733             (#x0085 (read-BIFF-BoundSheet8 s))
   1734             (#x00fc (let (*fHighByte*) (read-BIFF-SST s)))
   1735             (#x00fd (read-BIFF-LabelSst s))
   1736             ;;(#x020b (read-BIFF-Index s))
   1737             (#x027e (read-BIFF-Rk s))
   1738             (t tag)))))))
   1739 
   1740 (defun princ-cell-value (x sst)
   1741   (typecase x
   1742     (BIFF-LabelSst
   1743      (let ((c (BIFF-LabelSst.cell x)))
   1744        `(:label ,(BIFF-Cell.rw c)
   1745                 ,(BIFF-Cell.col c)
   1746                 ,(BIFF-XLUnicodeRichExtendedString.rgb
   1747                   (aref (BIFF-SST.rgb sst) (BIFF-LabelSst.isst x))))))
   1748     (BIFF-RK
   1749      `(:number ,(BIFF-RK.rw x)
   1750                ,(BIFF-RK.col x)
   1751                ,(BIFF-RkNumber.value (BIFF-RkRec.rk (BIFF-RK.RkRec x)))))))
   1752 
   1753 (defun parse-sheet (BIFF-BoundSheet8 stream sst)
   1754   (stream-position stream (BIFF-BoundSheet8.lbPlyPos BIFF-BoundSheet8))
   1755   (do (z x (s (biff-substream stream)))
   1756       ((not (setq x (funcall s)))
   1757        (nreverse z))
   1758     (let ((v (princ-cell-value x sst)))
   1759       (when v
   1760         (push v z))))
   1761   #+nil
   1762   (let ((index (funcall (biff-substream stream))))
   1763     (etypecase index
   1764       (BIFF-Index index #+nil(BIFF-Index.rgibRw )))))
   1765 
   1766 (defun parse-xls-file (filename)
   1767   (with-stream (f (ole-file-stream filename))
   1768     (let ((e (find-ole-entry f :name "Workbook" :type 2)))
   1769       (when e
   1770         (with-stream (in (ole-entry-stream f e))
   1771           (let (sheets sst)
   1772             (do (x (globals (biff-substream in)))
   1773                 ((not (setq x (funcall globals)))
   1774                  (setq sheets (nreverse sheets)))
   1775               (typecase x
   1776                 (BIFF-BoundSheet8 (push x sheets))
   1777                 (BIFF-SST (setq sst x))))
   1778             `(:workbook
   1779               ,@(loop
   1780                    for x in sheets
   1781                    collect `(:sheet
   1782                              ,(BIFF-ShortXLUnicodeString.decoded
   1783                                (BIFF-BoundSheet8.stName x))
   1784                              ,@(parse-sheet x in sst))))))))))