Mercurial > hg > config
annotate python/html2flux.py @ 295:e2564dd51334
slowly slowly, step by step...
| author | Jeff Hammel <jhammel@mozilla.com> |
|---|---|
| date | Sat, 11 May 2013 04:16:56 -0700 |
| parents | a0d830fd8a42 |
| children | ee3c1b65d6d1 |
| rev | line source |
|---|---|
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
2 |
| 292 | 3 """ |
| 4 transform an HTML <dl> file into a fluxbox menu | |
| 5 if no file give, read from stdin | |
| 6 | |
| 7 <dl><a>submenu name</a> | |
| 8 <dt>program label</dt><dd>command</dd> | |
| 9 <dt>another program label</dt><dd>command2</dd> | |
| 10 </dl> | |
| 11 | |
| 12 x-form -> internal format: | |
| 13 | |
| 14 ('submenu name': [('program label', 'command'), | |
| 15 ('another program label', 'command2')]) | |
| 16 """ | |
| 17 | |
| 18 import optparse | |
| 294 | 19 import os |
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
20 import sys |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
21 from lxml import etree |
| 292 | 22 from lsex import lsex # local import |
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
23 |
| 292 | 24 # available executables |
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
25 executables = set([i.rsplit('/', 1)[-1] for i in lsex() ]) |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
26 |
| 293 | 27 def readmenu(dl, output, top=True): |
| 292 | 28 |
| 29 menu_items = [] | |
| 30 name = None # menu name | |
| 294 | 31 firstchild = True |
| 32 label = None | |
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
33 for child in dl.iterchildren(): |
| 292 | 34 |
| 294 | 35 if not top and child.tag == 'a' and firstchild: |
| 292 | 36 # TODO: better way of labeling this! |
| 294 | 37 name = child.text.strip() |
| 292 | 38 |
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
39 if child.tag == 'dt': |
| 292 | 40 # item label |
| 41 label = ' '.join([i.strip() for i in child.itertext() if i.strip()]) | |
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
42 if child.tag == 'dd': |
| 294 | 43 # command |
| 292 | 44 command = ' '.join([i.strip() for i in child.itertext() if i.strip()]) |
| 294 | 45 # TODO: classes |
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
46 executable = command.split()[0] |
| 292 | 47 if executable in executables or os.path.isabs(executable): |
| 294 | 48 menu_items.append((label, command)) |
| 49 | |
| 50 # submenu | |
| 51 if child.tag == 'dl': | |
| 52 menu_items.append(readmenu(child, output, top=False)) | |
| 53 | |
| 54 return (name, menu_items) | |
| 292 | 55 |
| 294 | 56 def printflux(name, menu, output, top=True): |
| 57 """ | |
| 58 - output: file-like object for writing | |
| 59 """ | |
| 60 name = name or '' | |
| 61 print >> output, '[submenu] (%s)' % name | |
| 62 for name, item in menu: | |
| 63 if isinstance(item, basestring): | |
| 64 # command | |
| 65 print >> output, '[exec] (%s) {%s}' % (name, item) | |
| 66 else: | |
| 67 # submenu | |
| 68 printflux(name, item, output, top=False) | |
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
69 if not top: |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
70 print >> output, '[end]' |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
71 |
| 293 | 72 def printmenu(dl, output): |
| 294 | 73 name, menu = readmenu(dl, output) |
| 74 printflux(name, menu, output) | |
| 293 | 75 |
| 292 | 76 def main(args=sys.argv[1:]): |
| 293 | 77 """command line interface""" |
| 292 | 78 |
| 79 # parse command line option | |
| 80 usage = '%prog [options] [menu.html]' | |
| 81 parser = optparse.OptionParser(usage=usage, | |
| 82 description=__doc__) | |
| 83 parser.add_option('--collapse', dest='collapse', | |
| 84 action='store_true', default=False, | |
| 85 help="collapse menus with a single item to that item") | |
| 86 parser.add_option('-o', '--output', dest='output', | |
| 87 help="output file [Default: <stdout>]") | |
| 88 options, args = parser.parse_args(args) | |
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
89 |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
90 # setup input, output |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
91 if args: |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
92 htmlfile = file(args[0]) |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
93 else: |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
94 htmlfile = sys.stdin |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
95 html = htmlfile.read() |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
96 fluxout = sys.stdout |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
97 |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
98 # get first element |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
99 dom = etree.fromstring(html) |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
100 dl = dom.find('.//dl') |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
101 |
| 292 | 102 # print to stdout |
|
45
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
103 printmenu(dl, fluxout) |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
104 |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
105 if __name__ == '__main__': |
|
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
Jeff Hammel <k0scist@gmail.com>
parents:
diff
changeset
|
106 main() |
