picolisp

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/picolisp.git/
Log | Files | Refs | README | LICENSE

scrape.l (5903B)


      1 # 31jul13abu
      2 # (c) Software Lab. Alexander Burger
      3 
      4 # *ScrHost *ScrPort *ScrGate *Title *Expect *Found
      5 # *Links *Forms *Buttons *Fields *Errors
      6 
      7 # Scrape HTML form(s)
      8 (de scrape (Host Port How)
      9    (client (setq *ScrHost Host) (setq *ScrPort Port) How
     10       (off *ScrGate *Links *Forms *Buttons *Fields *Errors)
     11       (while
     12          (from
     13             "303 See Other"
     14             "<title>"
     15             "<base href=\"http://"
     16             "<a href=\""
     17             " action=\""
     18             "<input type=\"submit\" name=\""
     19             "<input type=\"hidden\" name=\""
     20             "<input type=\"text\" name=\""
     21             "<input type=\"password\" name=\""
     22             "<select name=\""
     23             "<option selected=\"selected\">"
     24             "<textarea name=\""
     25             "<span id=\""
     26             "<div class=\"error\">"
     27             *Expect )
     28          (casq @
     29             ("303 See Other"
     30                (when (from "Location: http://")
     31                   (let L (split (line) ':)
     32                      (if (cdr L)
     33                         (scrape
     34                            (pack (pop 'L))
     35                            (ifn (format (car (setq L (split (car L) '/))))
     36                               80
     37                               (pop 'L)
     38                               @ )
     39                            (glue '/ L) )
     40                         (setq L (split (car L) '/))
     41                         (scrape (pack (pop 'L)) 80 (glue '/ L)) ) ) ) )
     42             ("<title>"
     43                (setq *Title (ht:Pack (till "<"))) )
     44             ("<base href=\"http://"
     45                (let L (split (till "\"") ':)
     46                   (if (cdr L)
     47                      (setq
     48                         *ScrHost (pack (pop 'L))
     49                         *ScrPort (format (cdr (rot (car L)))) )
     50                      (setq *ScrGate (pack (cdr (member '/ (car L))))) ) ) )
     51             ("<a href=\""
     52                (let Url (pack *ScrGate (till "\"" T))
     53                   (from ">")
     54                   (cond
     55                      ((till "<")
     56                         (queue '*Links (cons (ht:Pack @) Url)) )
     57                      ((= "<img" (till " " T))
     58                         (from "alt=\"")
     59                         (queue '*Links (cons (ht:Pack (till "\"")) Url)) ) ) ) )
     60             (" action=\""
     61                (queue '*Forms  # (action . fields)
     62                   (list (pack *ScrGate (till "\"" T))) ) )
     63             ("<input type=\"submit\" name=\""
     64                (let Nm (till "\"" T)
     65                   (from "value=\"")
     66                   (queue '*Buttons  # (label field . form)
     67                      (cons
     68                         (ht:Pack (till "\""))
     69                         (cons Nm T)
     70                         (last *Forms) ) ) ) )
     71             ("<input type=\"hidden\" name=\""
     72                (conc (last *Forms)
     73                   (cons
     74                      (cons (till "\"" T)
     75                         (prog (from "value=\"") (ht:Pack (till "\"")))) ) ) )
     76             (("<input type=\"text\" name=\"" "<input type=\"password\" name=\"")
     77                (conc (last *Forms)
     78                   (cons
     79                      (queue '*Fields
     80                         (cons (till "\"" T)
     81                            (prog (from "value=\"") (ht:Pack (till "\"")))) ) ) ) )
     82             ("<select name=\""
     83                (conc (last *Forms)
     84                   (cons
     85                      (queue '*Fields (cons (till "\"" T))) ) ) )
     86             ("<option selected=\"selected\">"
     87                (con (last *Fields) (ht:Pack (till "<"))) )
     88             ("<textarea name=\""
     89                (conc (last *Forms)
     90                   (cons
     91                      (queue '*Fields
     92                         (cons (till "\"" T)
     93                            (prog (from ">") (ht:Pack (till "<"))) ) ) ) ) )
     94             ("<span id=\""
     95                (from ">")
     96                (queue '*Fields (ht:Pack (till "<"))) )
     97             ("<div class=\"error\">"
     98                (queue '*Errors (ht:Pack (till "<"))) )
     99             (T  (on *Found)) ) )
    100       (or *Errors *Title) ) )
    101 
    102 # Expect content
    103 (de expect (*Expect . "Prg")
    104    (let *Found NIL
    105       (run "Prg")
    106       (unless *Found
    107          (quit "Content not found" *Expect) ) ) )
    108 
    109 # Click on a link
    110 (de click (Lbl Cnt)
    111    (let L (cdr (target *Links Lbl Cnt))
    112       (when (pre? "http://" L)
    113          (setq
    114             L (split (nth (chop L) 8) '/ ':)
    115             *ScrHost (pack (pop 'L))
    116             *ScrPort (ifn (format (car L)) 80 (pop 'L) @)
    117             L (glue '/ L) ) )
    118       (scrape *ScrHost *ScrPort L) ) )
    119 
    120 # Press a button
    121 (de press (Lbl Cnt)
    122    (let B (target *Buttons Lbl Cnt)
    123       (scrape *ScrHost *ScrPort
    124          (cons
    125             (caddr B)
    126             (glue "&"
    127                (mapcar
    128                   '((X)
    129                      (list (car X) '= (ht:Fmt (cdr X))) )
    130                   (cons (cadr B) (cdddr B)) ) ) ) ) ) )
    131 
    132 # Retrieve a field's value
    133 (de value (Fld Cnt)
    134    (fin (field Fld Cnt)) )
    135 
    136 # Set a field's value
    137 (de enter (Fld Str Cnt)
    138    (con (field Fld Cnt) Str) )
    139 
    140 # Inspect current page
    141 (de display ()
    142    (prinl "###############")
    143    (apply println (mapcar car *Links) 'click)
    144    (prinl)
    145    (apply println (mapcar car *Buttons) 'press)
    146    (prinl)
    147    (apply println (trim (mapcar fin *Fields)) 'value)
    148    (prinl)
    149    *Title )
    150 
    151 ### Utility functions ###
    152 (de target (Lst Lbl Cnt)
    153    (cond
    154       ((num? Lbl)
    155          (get Lst Lbl) )
    156       ((pair Lbl) Lbl)
    157       (T
    158          (default Cnt 1)
    159          (or
    160             (find
    161                '((L)
    162                   (and
    163                      (pre? Lbl (car L))
    164                      (=0 (dec 'Cnt)) ) )
    165                Lst )
    166             (quit "Target not found" Lbl) ) ) ) )
    167 
    168 (de field (Fld Cnt)
    169    (or
    170       (cond
    171          ((gt0 Fld)
    172             (get *Fields Fld) )
    173          ((lt0 Fld)
    174             (get *Fields (+ (length *Fields) Fld 1)) )
    175          (T (assoc Fld (cdr (get *Forms (or Cnt 1))))) )
    176       (quit "Field not found" Fld) ) )
    177 
    178 # vi:et:ts=3:sw=3