mirror of https://github.com/interlegis/sapl.git
Marcio Mazza
10 years ago
committed by
Marcio Mazza
2 changed files with 29 additions and 0 deletions
@ -0,0 +1,28 @@ |
|||
import string |
|||
from bs4 import BeautifulSoup |
|||
from itertools import chain |
|||
|
|||
|
|||
def _label_from_td(td): |
|||
return td.text.strip().split('\n')[0].strip(u'\xa0' + string.whitespace) |
|||
|
|||
|
|||
def get_labels(filename, flat=True): |
|||
"""Extract labels from a file containg the html source of a rendered |
|||
legacy sapl form |
|||
""" |
|||
with open(filename, 'r') as f: |
|||
cont = f.read() |
|||
html_doc = cont.decode('utf-8') |
|||
soup = BeautifulSoup(html_doc, 'html.parser') |
|||
forms = soup.find_all('form') |
|||
[form] = [f for f in forms if (u'method', u'post') in f.attrs.items()] |
|||
|
|||
labels = [[_label_from_td(td) for td in tr.find_all('td')] for tr in form.find_all('tr')] |
|||
for line in labels: |
|||
print ', '.join("u'%s'" % l for l in line) |
|||
if flat: |
|||
return list(chain(*labels)) |
|||
else: |
|||
return labels |
|||
|
Loading…
Reference in new issue