emacs-unoffice

Emacs library to reclaim text from office documents (abw, odt, docx).
Log | Files | Refs

commit 676dda1a90f1c26e24d0ac4c996ebcaef054a6a5
parent 55e217c66130408173c198bab85315682dbbafe5
Author: Tomas Hlavaty <tom@logand.com>
Date:   Fri,  5 Jun 2020 23:23:56 +0200

add emacs-unoffice.el

Diffstat:
Aemacs-unoffice.el | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 81 insertions(+), 0 deletions(-)

diff --git a/emacs-unoffice.el b/emacs-unoffice.el @@ -0,0 +1,81 @@ +;;; -*- lexical-binding: t -*- +;;; +;;; emacs-unoffice.el +;;; +;;; Reclaim text from office documents (abw, odt, docx). +;;; +;;; Copyright (C) 2020 Tomas Hlavaty <tom at logand dot com> +;;; +;;; License: GPLv3 or later +;;; +;;; Download: git clone https://logand.com/git/emacs-unoffice.git +;;; +;;; Example configuration: +;;; +;;; (require 'unoffice) +;;; (add-to-list 'auto-mode-alist '("\\.abw\\'" . unabw)) +;;; (add-to-list 'auto-mode-alist '("\\.docx\\'" . undocx)) +;;; (add-to-list 'auto-mode-alist '("\\.odt\\'" . unodt)) + +(defun unabw () + (interactive) + (with-silent-modifications + (erase-buffer) + (cl-labels ((rec (x) + (when (consp x) + (case (car x) + (p (mapc #'rec (cddr x)) (insert "\n")) + (c (mapc #'insert (cddr x))) + (t (mapc #'rec (cddr x))))))) + (rec (car (xml-parse-file buffer-file-name))))) + (setq buffer-read-only t) + (goto-char (point-min))) + +(defun unodt () + (interactive) + (with-silent-modifications + (erase-buffer) + (cl-labels ((rec (x) + (etypecase x + (null) + (string (insert x)) + (cons + (case (car x) + (text:p (mapc #'rec (cddr x)) (insert "\n")) + (t (mapc #'rec (cddr x)))))))) + (rec + (let ((f buffer-file-name)) + (with-temp-buffer + (when (zerop (archive-zip-extract f "content.xml")) + (car (xml-parse-region)))))))) + (setq buffer-read-only t) + (goto-char (point-min))) + +(defun undocx () + (interactive) + (with-silent-modifications + (erase-buffer) + (let (wrote) + (cl-labels ((ins (x) + (when (and x (not (equal "" x))) + (insert x) + (setq wrote t))) + (rec (x) + (when (consp x) + (case (car x) + (w:p (mapc #'rec (cddr x)) + (when wrote + (ins "\n\n") + (setq wrote nil))) + (w:t (mapc #'ins (cddr x))) + (t (mapc #'rec (cddr x))))))) + (rec + (let ((f buffer-file-name)) + (with-temp-buffer + (when (or (zerop (archive-zip-extract f "word/document.xml")) + (zerop (archive-zip-extract f "document.xml"))) + (car (xml-parse-region))))))))) + (setq buffer-read-only t) + (goto-char (point-min))) + +(provide 'unoffice)