scrape.l (5903B)
1 # 31jul13abu 2 # (c) Software Lab. Alexander Burger 3 4 # *ScrHost *ScrPort *ScrGate *Title *Expect *Found 5 # *Links *Forms *Buttons *Fields *Errors 6 7 # Scrape HTML form(s) 8 (de scrape (Host Port How) 9 (client (setq *ScrHost Host) (setq *ScrPort Port) How 10 (off *ScrGate *Links *Forms *Buttons *Fields *Errors) 11 (while 12 (from 13 "303 See Other" 14 "<title>" 15 "<base href=\"http://" 16 "<a href=\"" 17 " action=\"" 18 "<input type=\"submit\" name=\"" 19 "<input type=\"hidden\" name=\"" 20 "<input type=\"text\" name=\"" 21 "<input type=\"password\" name=\"" 22 "<select name=\"" 23 "<option selected=\"selected\">" 24 "<textarea name=\"" 25 "<span id=\"" 26 "<div class=\"error\">" 27 *Expect ) 28 (casq @ 29 ("303 See Other" 30 (when (from "Location: http://") 31 (let L (split (line) ':) 32 (if (cdr L) 33 (scrape 34 (pack (pop 'L)) 35 (ifn (format (car (setq L (split (car L) '/)))) 36 80 37 (pop 'L) 38 @ ) 39 (glue '/ L) ) 40 (setq L (split (car L) '/)) 41 (scrape (pack (pop 'L)) 80 (glue '/ L)) ) ) ) ) 42 ("<title>" 43 (setq *Title (ht:Pack (till "<"))) ) 44 ("<base href=\"http://" 45 (let L (split (till "\"") ':) 46 (if (cdr L) 47 (setq 48 *ScrHost (pack (pop 'L)) 49 *ScrPort (format (cdr (rot (car L)))) ) 50 (setq *ScrGate (pack (cdr (member '/ (car L))))) ) ) ) 51 ("<a href=\"" 52 (let Url (pack *ScrGate (till "\"" T)) 53 (from ">") 54 (cond 55 ((till "<") 56 (queue '*Links (cons (ht:Pack @) Url)) ) 57 ((= "<img" (till " " T)) 58 (from "alt=\"") 59 (queue '*Links (cons (ht:Pack (till "\"")) Url)) ) ) ) ) 60 (" action=\"" 61 (queue '*Forms # (action . fields) 62 (list (pack *ScrGate (till "\"" T))) ) ) 63 ("<input type=\"submit\" name=\"" 64 (let Nm (till "\"" T) 65 (from "value=\"") 66 (queue '*Buttons # (label field . form) 67 (cons 68 (ht:Pack (till "\"")) 69 (cons Nm T) 70 (last *Forms) ) ) ) ) 71 ("<input type=\"hidden\" name=\"" 72 (conc (last *Forms) 73 (cons 74 (cons (till "\"" T) 75 (prog (from "value=\"") (ht:Pack (till "\"")))) ) ) ) 76 (("<input type=\"text\" name=\"" "<input type=\"password\" name=\"") 77 (conc (last *Forms) 78 (cons 79 (queue '*Fields 80 (cons (till "\"" T) 81 (prog (from "value=\"") (ht:Pack (till "\"")))) ) ) ) ) 82 ("<select name=\"" 83 (conc (last *Forms) 84 (cons 85 (queue '*Fields (cons (till "\"" T))) ) ) ) 86 ("<option selected=\"selected\">" 87 (con (last *Fields) (ht:Pack (till "<"))) ) 88 ("<textarea name=\"" 89 (conc (last *Forms) 90 (cons 91 (queue '*Fields 92 (cons (till "\"" T) 93 (prog (from ">") (ht:Pack (till "<"))) ) ) ) ) ) 94 ("<span id=\"" 95 (from ">") 96 (queue '*Fields (ht:Pack (till "<"))) ) 97 ("<div class=\"error\">" 98 (queue '*Errors (ht:Pack (till "<"))) ) 99 (T (on *Found)) ) ) 100 (or *Errors *Title) ) ) 101 102 # Expect content 103 (de expect (*Expect . "Prg") 104 (let *Found NIL 105 (run "Prg") 106 (unless *Found 107 (quit "Content not found" *Expect) ) ) ) 108 109 # Click on a link 110 (de click (Lbl Cnt) 111 (let L (cdr (target *Links Lbl Cnt)) 112 (when (pre? "http://" L) 113 (setq 114 L (split (nth (chop L) 8) '/ ':) 115 *ScrHost (pack (pop 'L)) 116 *ScrPort (ifn (format (car L)) 80 (pop 'L) @) 117 L (glue '/ L) ) ) 118 (scrape *ScrHost *ScrPort L) ) ) 119 120 # Press a button 121 (de press (Lbl Cnt) 122 (let B (target *Buttons Lbl Cnt) 123 (scrape *ScrHost *ScrPort 124 (cons 125 (caddr B) 126 (glue "&" 127 (mapcar 128 '((X) 129 (list (car X) '= (ht:Fmt (cdr X))) ) 130 (cons (cadr B) (cdddr B)) ) ) ) ) ) ) 131 132 # Retrieve a field's value 133 (de value (Fld Cnt) 134 (fin (field Fld Cnt)) ) 135 136 # Set a field's value 137 (de enter (Fld Str Cnt) 138 (con (field Fld Cnt) Str) ) 139 140 # Inspect current page 141 (de display () 142 (prinl "###############") 143 (apply println (mapcar car *Links) 'click) 144 (prinl) 145 (apply println (mapcar car *Buttons) 'press) 146 (prinl) 147 (apply println (trim (mapcar fin *Fields)) 'value) 148 (prinl) 149 *Title ) 150 151 ### Utility functions ### 152 (de target (Lst Lbl Cnt) 153 (cond 154 ((num? Lbl) 155 (get Lst Lbl) ) 156 ((pair Lbl) Lbl) 157 (T 158 (default Cnt 1) 159 (or 160 (find 161 '((L) 162 (and 163 (pre? Lbl (car L)) 164 (=0 (dec 'Cnt)) ) ) 165 Lst ) 166 (quit "Target not found" Lbl) ) ) ) ) 167 168 (de field (Fld Cnt) 169 (or 170 (cond 171 ((gt0 Fld) 172 (get *Fields Fld) ) 173 ((lt0 Fld) 174 (get *Fields (+ (length *Fields) Fld 1)) ) 175 (T (assoc Fld (cdr (get *Forms (or Cnt 1))))) ) 176 (quit "Field not found" Fld) ) ) 177 178 # vi:et:ts=3:sw=3